1098 lines
33 KiB
Bash
1098 lines
33 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
ROOT="${ULTRACLOUD_REPO_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}"
|
|
DEFAULT_WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-}"
|
|
|
|
CLUSTER_ID="${ULTRACLOUD_BAREMETAL_CLUSTER_ID:-baremetal-iso-canonical}"
|
|
CHAINFIRE_ENDPOINT="http://127.0.0.1:2379"
|
|
DEPLOYER_ENDPOINT="http://127.0.0.1:8088"
|
|
BINARY_CACHE_ENDPOINT="http://127.0.0.1:8090"
|
|
BOOTSTRAP_TOKEN="${ULTRACLOUD_BAREMETAL_BOOTSTRAP_TOKEN:-baremetal-iso-bootstrap-token}"
|
|
CONTROL_NODE_CLASS="${ULTRACLOUD_BAREMETAL_CONTROL_NODE_CLASS:-iso-control-plane}"
|
|
WORKER_NODE_CLASS="${ULTRACLOUD_BAREMETAL_WORKER_NODE_CLASS:-iso-worker}"
|
|
CONTROL_NIXOS_CONFIGURATION="${ULTRACLOUD_BAREMETAL_CONTROL_CONFIGURATION:-baremetal-qemu-control-plane}"
|
|
WORKER_NIXOS_CONFIGURATION="${ULTRACLOUD_BAREMETAL_WORKER_CONFIGURATION:-baremetal-qemu-worker}"
|
|
CONTROL_DISKO_CONFIG_PATH="${ULTRACLOUD_BAREMETAL_CONTROL_DISKO_CONFIG_PATH:-nix/nodes/baremetal-qemu/control-plane/disko.nix}"
|
|
WORKER_DISKO_CONFIG_PATH="${ULTRACLOUD_BAREMETAL_WORKER_DISKO_CONFIG_PATH:-nix/nodes/baremetal-qemu/worker/disko.nix}"
|
|
CONTROL_TARGET_DISK_BY_ID="${ULTRACLOUD_BAREMETAL_CONTROL_TARGET_DISK_BY_ID:-/dev/disk/by-id/virtio-uc-control-root}"
|
|
WORKER_TARGET_DISK_BY_ID="${ULTRACLOUD_BAREMETAL_WORKER_TARGET_DISK_BY_ID:-/dev/disk/by-id/virtio-uc-worker-root}"
|
|
CONTROL_DISK_SERIAL="${ULTRACLOUD_BAREMETAL_CONTROL_DISK_SERIAL:-uc-control-root}"
|
|
WORKER_DISK_SERIAL="${ULTRACLOUD_BAREMETAL_WORKER_DISK_SERIAL:-uc-worker-root}"
|
|
CONTROL_HEALTH_CHECK_PATH="/etc/ultracloud-role-control-plane"
|
|
WORKER_HEALTH_CHECK_PATH="/etc/ultracloud-role-worker"
|
|
CONTROL_NODE_ID="${ULTRACLOUD_BAREMETAL_CONTROL_NODE_ID:-iso-control-plane-01}"
|
|
WORKER_NODE_ID="${ULTRACLOUD_BAREMETAL_WORKER_NODE_ID:-iso-worker-01}"
|
|
CONTROL_SSH_PORT="${ULTRACLOUD_BAREMETAL_CONTROL_SSH_PORT:-22231}"
|
|
WORKER_SSH_PORT="${ULTRACLOUD_BAREMETAL_WORKER_SSH_PORT:-22232}"
|
|
CONTROL_DHCP_START="${ULTRACLOUD_BAREMETAL_CONTROL_DHCP_START:-10.0.2.15}"
|
|
WORKER_DHCP_START="${ULTRACLOUD_BAREMETAL_WORKER_DHCP_START:-10.0.2.16}"
|
|
CONTROL_DISK_GIB="${ULTRACLOUD_BAREMETAL_CONTROL_DISK_GIB:-18G}"
|
|
WORKER_DISK_GIB="${ULTRACLOUD_BAREMETAL_WORKER_DISK_GIB:-18G}"
|
|
|
|
log() {
|
|
printf '[baremetal-iso-e2e] %s\n' "$*"
|
|
}
|
|
|
|
marker() {
|
|
printf 'ULTRACLOUD_MARKER %s\n' "$*"
|
|
}
|
|
|
|
die() {
|
|
echo "[baremetal-iso-e2e] ERROR: $*" >&2
|
|
exit 1
|
|
}
|
|
|
|
require_cmd() {
|
|
command -v "$1" >/dev/null 2>&1 || die "required command not found: $1"
|
|
}
|
|
|
|
host_cpu_count() {
|
|
local count
|
|
count="$(getconf _NPROCESSORS_ONLN 2>/dev/null || nproc 2>/dev/null || echo 1)"
|
|
if [[ ! "${count}" =~ ^[0-9]+$ ]] || (( count < 1 )); then
|
|
count=1
|
|
fi
|
|
printf '%s\n' "${count}"
|
|
}
|
|
|
|
default_local_nix_max_jobs() {
|
|
local cpu_count="$1"
|
|
if (( cpu_count <= 2 )); then
|
|
printf '1\n'
|
|
return 0
|
|
fi
|
|
|
|
printf '%s\n' "$(( (cpu_count + 1) / 2 ))"
|
|
}
|
|
|
|
default_local_nix_build_cores() {
|
|
local cpu_count="$1"
|
|
local max_jobs="$2"
|
|
local build_cores=1
|
|
|
|
if (( max_jobs > 0 )); then
|
|
build_cores="$(( cpu_count / max_jobs ))"
|
|
fi
|
|
if (( build_cores < 1 )); then
|
|
build_cores=1
|
|
fi
|
|
|
|
printf '%s\n' "${build_cores}"
|
|
}
|
|
|
|
default_baremetal_vm_vcpus() {
|
|
local cpu_count="$1"
|
|
if (( cpu_count >= 8 )); then
|
|
printf '4\n'
|
|
elif (( cpu_count >= 4 )); then
|
|
printf '2\n'
|
|
else
|
|
printf '1\n'
|
|
fi
|
|
}
|
|
|
|
default_baremetal_vm_memory_mib() {
|
|
local cpu_count="$1"
|
|
if (( cpu_count >= 8 )); then
|
|
printf '3072\n'
|
|
else
|
|
printf '2048\n'
|
|
fi
|
|
}
|
|
|
|
append_nix_config_line() {
|
|
local line="$1"
|
|
if [[ -n "${NIX_CONFIG:-}" ]]; then
|
|
NIX_CONFIG+=$'\n'
|
|
fi
|
|
NIX_CONFIG+="${line}"
|
|
}
|
|
|
|
configure_local_nix_execution() {
|
|
append_nix_config_line "builders ="
|
|
append_nix_config_line "max-jobs = ${LOCAL_NIX_MAX_JOBS}"
|
|
append_nix_config_line "cores = ${LOCAL_NIX_BUILD_CORES}"
|
|
append_nix_config_line "experimental-features = nix-command flakes"
|
|
append_nix_config_line "warn-dirty = false"
|
|
export NIX_CONFIG
|
|
}
|
|
|
|
host_kvm_access() {
|
|
[[ -r /dev/kvm && -w /dev/kvm ]]
|
|
}
|
|
|
|
qemu_machine_args() {
|
|
if [[ "${BAREMETAL_VM_ACCELERATOR_MODE}" == "kvm" ]]; then
|
|
printf '%s\n' \
|
|
"-machine" "pc,accel=kvm:tcg" \
|
|
"-enable-kvm" \
|
|
"-cpu" "host"
|
|
return 0
|
|
fi
|
|
|
|
printf '%s\n' \
|
|
"-machine" "pc" \
|
|
"-accel" "tcg,thread=multi" \
|
|
"-cpu" "max"
|
|
}
|
|
|
|
nix_build_local() {
|
|
NIX_BUILD_CORES="${LOCAL_NIX_BUILD_CORES}" nix \
|
|
--option builders '' \
|
|
--option warn-dirty false \
|
|
--max-jobs "${LOCAL_NIX_MAX_JOBS}" \
|
|
build "$@"
|
|
}
|
|
|
|
resolve_default_work_root() {
|
|
if [[ -n "${DEFAULT_WORK_ROOT}" ]]; then
|
|
printf '%s\n' "${DEFAULT_WORK_ROOT}"
|
|
return 0
|
|
fi
|
|
|
|
if [[ -w "${ROOT}" ]]; then
|
|
printf '%s\n' "${ROOT}/work"
|
|
return 0
|
|
fi
|
|
|
|
if [[ -n "${TMPDIR:-}" ]]; then
|
|
printf '%s\n' "${TMPDIR}/ultracloud"
|
|
return 0
|
|
fi
|
|
|
|
printf '%s\n' "/tmp/ultracloud"
|
|
}
|
|
|
|
resolve_store_path() {
|
|
local env_name="$1"
|
|
local attr="$2"
|
|
if [[ -n "${!env_name:-}" ]]; then
|
|
printf '%s\n' "${!env_name}"
|
|
return 0
|
|
fi
|
|
nix_build_local "$ROOT#$attr" --no-link --print-out-paths
|
|
}
|
|
|
|
resolve_binary() {
|
|
local env_name="$1"
|
|
local bin_name="$2"
|
|
local attr="$3"
|
|
if [[ -n "${!env_name:-}" ]]; then
|
|
printf '%s\n' "${!env_name}"
|
|
return 0
|
|
fi
|
|
if command -v "$bin_name" >/dev/null 2>&1; then
|
|
command -v "$bin_name"
|
|
return 0
|
|
fi
|
|
local out
|
|
out="$(nix_build_local "$ROOT#$attr" --no-link --print-out-paths)"
|
|
printf '%s/bin/%s\n' "$out" "$bin_name"
|
|
}
|
|
|
|
resolve_iso_image() {
|
|
local candidate="$1"
|
|
if [[ -f "$candidate" ]]; then
|
|
printf '%s\n' "$candidate"
|
|
return 0
|
|
fi
|
|
|
|
local iso_dir="$candidate/iso"
|
|
if [[ -d "$iso_dir" ]]; then
|
|
local iso_path
|
|
iso_path="$(find "$iso_dir" -maxdepth 1 -type f -name '*.iso' | head -n 1)"
|
|
if [[ -n "$iso_path" ]]; then
|
|
printf '%s\n' "$iso_path"
|
|
return 0
|
|
fi
|
|
fi
|
|
|
|
die "unable to resolve a bootable ISO file from $candidate"
|
|
}
|
|
|
|
resolve_ovmf_firmware() {
|
|
local env_name="$1"
|
|
local relative_path="$2"
|
|
if [[ -n "${!env_name:-}" ]]; then
|
|
printf '%s\n' "${!env_name}"
|
|
return 0
|
|
fi
|
|
|
|
local ovmf_dir
|
|
ovmf_dir="$(nix_build_local nixpkgs#OVMF.fd --no-link --print-out-paths)"
|
|
printf '%s/%s\n' "$ovmf_dir" "$relative_path"
|
|
}
|
|
|
|
capture_environment() {
|
|
{
|
|
printf 'started_at=%s\n' "$(date -Is)"
|
|
printf 'pwd=%s\n' "$PWD"
|
|
printf 'user=%s\n' "$(id -un)"
|
|
printf 'uid=%s\n' "$(id -u)"
|
|
printf 'gid=%s\n' "$(id -g)"
|
|
printf 'work_root=%s\n' "${DEFAULT_WORK_ROOT}"
|
|
printf 'state_dir=%s\n' "$TMP_DIR"
|
|
printf 'iso_image=%s\n' "$ISO_IMAGE"
|
|
printf 'flake_bundle=%s\n' "$FLAKE_BUNDLE"
|
|
printf 'bootstrap_token_set=%s\n' "$([[ -n "${BOOTSTRAP_TOKEN}" ]] && echo yes || echo no)"
|
|
printf 'control_node_class=%s\n' "$CONTROL_NODE_CLASS"
|
|
printf 'worker_node_class=%s\n' "$WORKER_NODE_CLASS"
|
|
printf 'control_nixos_configuration=%s\n' "$CONTROL_NIXOS_CONFIGURATION"
|
|
printf 'worker_nixos_configuration=%s\n' "$WORKER_NIXOS_CONFIGURATION"
|
|
printf 'control_disko_config_path=%s\n' "$CONTROL_DISKO_CONFIG_PATH"
|
|
printf 'worker_disko_config_path=%s\n' "$WORKER_DISKO_CONFIG_PATH"
|
|
printf 'control_target_disk_by_id=%s\n' "$CONTROL_TARGET_DISK_BY_ID"
|
|
printf 'worker_target_disk_by_id=%s\n' "$WORKER_TARGET_DISK_BY_ID"
|
|
printf 'control_target=%s\n' "$CONTROL_TARGET_SYSTEM"
|
|
printf 'worker_target=%s\n' "$WORKER_TARGET_SYSTEM"
|
|
printf 'tmpdir=%s\n' "${TMPDIR:-}"
|
|
printf 'host_cpu_count=%s\n' "${HOST_CPU_COUNT}"
|
|
printf 'local_nix_max_jobs=%s\n' "${LOCAL_NIX_MAX_JOBS}"
|
|
printf 'local_nix_build_cores=%s\n' "${LOCAL_NIX_BUILD_CORES}"
|
|
printf 'vm_accelerator_mode=%s\n' "${BAREMETAL_VM_ACCELERATOR_MODE}"
|
|
printf 'vm_vcpus=%s\n' "${BAREMETAL_VM_VCPUS}"
|
|
printf 'vm_memory_mib=%s\n' "${BAREMETAL_VM_MEMORY_MIB}"
|
|
printf 'kvm_present=%s\n' "$([[ -e /dev/kvm ]] && echo yes || echo no)"
|
|
printf 'kvm_access=%s\n' "$([[ -r /dev/kvm && -w /dev/kvm ]] && echo rw || echo no)"
|
|
printf 'nix_builders=%s\n' "$(nix config show builders 2>/dev/null | awk -F' = ' 'NR==1 { print $2 }')"
|
|
} >"$TMP_DIR/environment.txt"
|
|
}
|
|
|
|
wait_for_http() {
|
|
local url="$1"
|
|
local timeout_secs="$2"
|
|
local deadline=$((SECONDS + timeout_secs))
|
|
while (( SECONDS < deadline )); do
|
|
if curl -fsS "$url" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
return 1
|
|
}
|
|
|
|
wait_for_log_marker() {
|
|
local label="$1"
|
|
local log_file="$2"
|
|
local needle="$3"
|
|
local timeout_secs="$4"
|
|
local deadline=$((SECONDS + timeout_secs))
|
|
while (( SECONDS < deadline )); do
|
|
if [[ -f "$log_file" ]] && grep -Eq "$needle" "$log_file"; then
|
|
log "${label}: observed ${needle}"
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
ssh_base() {
|
|
local port="$1"
|
|
shift
|
|
ssh \
|
|
-F /dev/null \
|
|
-i "$SSH_KEY" \
|
|
-o BatchMode=yes \
|
|
-o ConnectTimeout=5 \
|
|
-o ConnectionAttempts=1 \
|
|
-o StrictHostKeyChecking=no \
|
|
-o UserKnownHostsFile=/dev/null \
|
|
-o LogLevel=ERROR \
|
|
-p "$port" \
|
|
root@127.0.0.1 "$@"
|
|
}
|
|
|
|
wait_for_ssh() {
|
|
local label="$1"
|
|
local port="$2"
|
|
local timeout_secs="$3"
|
|
local deadline=$((SECONDS + timeout_secs))
|
|
while (( SECONDS < deadline )); do
|
|
if ssh_base "$port" true >/dev/null 2>&1; then
|
|
log "${label}: SSH is reachable on port ${port}"
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
ssh_shell() {
|
|
local port="$1"
|
|
local script="$2"
|
|
local quoted
|
|
printf -v quoted '%q' "$script"
|
|
ssh_base "$port" "bash -lc $quoted"
|
|
}
|
|
|
|
current_system_path() {
|
|
local port="$1"
|
|
ssh_shell "$port" 'readlink -f /run/current-system'
|
|
}
|
|
|
|
remote_boot_id() {
|
|
local port="$1"
|
|
ssh_shell "$port" 'cat /proc/sys/kernel/random/boot_id'
|
|
}
|
|
|
|
remote_journal_has_marker() {
|
|
local port="$1"
|
|
local needle="$2"
|
|
shift 2
|
|
|
|
local remote_cmd="journalctl -b -o cat --no-pager"
|
|
local unit
|
|
for unit in "$@"; do
|
|
printf -v remote_cmd '%s -u %q' "$remote_cmd" "$unit"
|
|
done
|
|
printf -v remote_cmd '%s | grep -Fq %q' "$remote_cmd" "$needle"
|
|
|
|
ssh_shell "$port" "$remote_cmd"
|
|
}
|
|
|
|
wait_for_remote_journal_marker() {
|
|
local label="$1"
|
|
local port="$2"
|
|
local needle="$3"
|
|
local timeout_secs="$4"
|
|
shift 4
|
|
|
|
local deadline=$((SECONDS + timeout_secs))
|
|
while (( SECONDS < deadline )); do
|
|
if remote_journal_has_marker "$port" "$needle" "$@" >/dev/null 2>&1; then
|
|
log "${label}: observed ${needle} via remote journal"
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
wait_for_remote_unit_active() {
|
|
local label="$1"
|
|
local port="$2"
|
|
local unit_name="$3"
|
|
local timeout_secs="$4"
|
|
local deadline=$((SECONDS + timeout_secs))
|
|
|
|
while (( SECONDS < deadline )); do
|
|
if ssh_shell "$port" "systemctl is-active ${unit_name} >/dev/null" >/dev/null 2>&1; then
|
|
log "${label}: ${unit_name} is active"
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
wait_for_reboot_transition() {
|
|
local label="$1"
|
|
local port="$2"
|
|
local previous_boot_id="$3"
|
|
local timeout_secs="$4"
|
|
local deadline=$((SECONDS + timeout_secs))
|
|
|
|
while (( SECONDS < deadline )); do
|
|
local current_boot_id
|
|
if current_boot_id="$(remote_boot_id "$port" 2>/dev/null)"; then
|
|
if [[ -n "$current_boot_id" && "$current_boot_id" != "$previous_boot_id" ]]; then
|
|
log "${label}: reboot completed with boot_id=${current_boot_id}"
|
|
return 0
|
|
fi
|
|
fi
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
observed_status() {
|
|
local node_id="$1"
|
|
local payload
|
|
if ! payload="$(
|
|
"$DEPLOYER_CTL_BIN" \
|
|
--chainfire-endpoint "$CHAINFIRE_ENDPOINT" \
|
|
--cluster-id "$CLUSTER_ID" \
|
|
--cluster-namespace ultracloud \
|
|
--deployer-namespace deployer \
|
|
node inspect \
|
|
--node-id "$node_id" \
|
|
--include-observed-system \
|
|
--format json 2>/dev/null
|
|
)"; then
|
|
printf 'missing\n'
|
|
return 0
|
|
fi
|
|
|
|
jq -r '.observed_system.status // "missing"' <<<"$payload"
|
|
}
|
|
|
|
wait_for_observed_active() {
|
|
local node_id="$1"
|
|
local timeout_secs="$2"
|
|
local deadline=$((SECONDS + timeout_secs))
|
|
while (( SECONDS < deadline )); do
|
|
if [[ "$(observed_status "$node_id")" == "active" ]]; then
|
|
log "${node_id}: observed-system reached active"
|
|
return 0
|
|
fi
|
|
sleep 5
|
|
done
|
|
return 1
|
|
}
|
|
|
|
inspect_node_payload() {
|
|
local node_id="$1"
|
|
|
|
"$DEPLOYER_CTL_BIN" \
|
|
--chainfire-endpoint "$CHAINFIRE_ENDPOINT" \
|
|
--cluster-id "$CLUSTER_ID" \
|
|
--cluster-namespace ultracloud \
|
|
--deployer-namespace deployer \
|
|
node inspect \
|
|
--node-id "$node_id" \
|
|
--include-desired-system \
|
|
--format json
|
|
}
|
|
|
|
assert_node_contract() {
|
|
local node_id="$1"
|
|
local expected_node_class="$2"
|
|
local expected_nixos_configuration="$3"
|
|
local expected_disko_config_path="$4"
|
|
local expected_target_disk_by_id="$5"
|
|
local expected_health_check_path="$6"
|
|
local expected_target_system="$7"
|
|
local payload
|
|
|
|
payload="$(inspect_node_payload "$node_id")" \
|
|
|| die "${node_id} install contract is not inspectable through deployer-ctl"
|
|
|
|
jq -e \
|
|
--arg node_id "$node_id" \
|
|
--arg node_class "$expected_node_class" \
|
|
--arg nixos_configuration "$expected_nixos_configuration" \
|
|
--arg disko_config_path "$expected_disko_config_path" \
|
|
--arg target_disk_by_id "$expected_target_disk_by_id" \
|
|
--arg health_check_path "$expected_health_check_path" \
|
|
--arg target_system "$expected_target_system" \
|
|
'
|
|
.node.node_id == $node_id
|
|
and .node.node_class == $node_class
|
|
and .node.install_plan.nixos_configuration == $nixos_configuration
|
|
and .node.install_plan.disko_config_path == $disko_config_path
|
|
and (.node.install_plan.target_disk_by_id // "") == $target_disk_by_id
|
|
and (.node.install_plan.target_disk // "") == ""
|
|
and .desired_system.nixos_configuration == $nixos_configuration
|
|
and (.desired_system.target_system // "") == $target_system
|
|
and (.desired_system.switch_action // "switch") == "switch"
|
|
and (.desired_system.rollback_on_failure // true) == true
|
|
and ((.desired_system.health_check_command | if length == 0 then "" else .[-1] end) == $health_check_path)
|
|
' <<<"$payload" >/dev/null \
|
|
|| die "${node_id} install contract did not resolve to the expected class/profile defaults"
|
|
|
|
log "${node_id}: install contract resolved via node class ${expected_node_class}"
|
|
}
|
|
|
|
assert_port_free() {
|
|
local port="$1"
|
|
if ss -ltn "( sport = :$port )" | grep -Fq ":$port"; then
|
|
die "port $port is already in use"
|
|
fi
|
|
}
|
|
|
|
start_host_services() {
|
|
cat >"$TMP_DIR/chainfire.toml" <<EOF
|
|
[node]
|
|
id = 1
|
|
name = "baremetal-iso-chainfire"
|
|
role = "control_plane"
|
|
|
|
[storage]
|
|
data_dir = "$TMP_DIR/chainfire-data"
|
|
|
|
[network]
|
|
api_addr = "0.0.0.0:2379"
|
|
http_addr = "0.0.0.0:8081"
|
|
raft_addr = "0.0.0.0:2380"
|
|
gossip_addr = "0.0.0.0:2381"
|
|
|
|
[cluster]
|
|
id = 1
|
|
initial_members = []
|
|
bootstrap = true
|
|
|
|
[raft]
|
|
role = "voter"
|
|
EOF
|
|
|
|
cat >"$TMP_DIR/deployer.toml" <<EOF
|
|
bind_addr = "0.0.0.0:8088"
|
|
cluster_id = "${CLUSTER_ID}"
|
|
cluster_namespace = "ultracloud"
|
|
heartbeat_timeout_secs = 300
|
|
local_state_path = "$TMP_DIR/deployer-state"
|
|
bootstrap_flake_bundle_path = "$FLAKE_BUNDLE"
|
|
bootstrap_token = "${BOOTSTRAP_TOKEN}"
|
|
require_chainfire = true
|
|
allow_unknown_nodes = false
|
|
allow_unauthenticated = true
|
|
allow_test_mappings = false
|
|
tls_self_signed = false
|
|
|
|
[chainfire]
|
|
endpoints = ["${CHAINFIRE_ENDPOINT}"]
|
|
namespace = "deployer"
|
|
EOF
|
|
|
|
log "Starting host-side Chainfire"
|
|
NO_COLOR=1 CLICOLOR=0 RUST_LOG_STYLE=never \
|
|
"$CHAINFIRE_BIN" --config "$TMP_DIR/chainfire.toml" >"$CHAINFIRE_LOG" 2>&1 &
|
|
CHAINFIRE_PID="$!"
|
|
|
|
wait_for_http "http://127.0.0.1:8081/health" 120 \
|
|
|| die "host Chainfire did not become healthy"
|
|
|
|
log "Starting host-side Deployer"
|
|
NO_COLOR=1 CLICOLOR=0 RUST_LOG_STYLE=never \
|
|
"$DEPLOYER_SERVER_BIN" --config "$TMP_DIR/deployer.toml" >"$DEPLOYER_LOG" 2>&1 &
|
|
DEPLOYER_PID="$!"
|
|
|
|
wait_for_http "http://127.0.0.1:8088/health" 120 \
|
|
|| die "host Deployer did not become healthy"
|
|
}
|
|
|
|
seed_binary_cache() {
|
|
local path
|
|
local nar_rel
|
|
local nar_path
|
|
local store_base
|
|
local store_hash
|
|
local nar_hash
|
|
local nar_size
|
|
local refs
|
|
local deriver
|
|
|
|
mkdir -p "$NIX_CACHE_DIR/nar"
|
|
cat >"$NIX_CACHE_DIR/nix-cache-info" <<'EOF'
|
|
StoreDir: /nix/store
|
|
WantMassQuery: 1
|
|
Priority: 30
|
|
EOF
|
|
|
|
log "Seeding host-local Nix binary cache"
|
|
if [[ -n "${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION:-}" && -f "${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION}/registration" ]]; then
|
|
nix-store --load-db <"${ULTRACLOUD_BAREMETAL_CACHE_REGISTRATION}/registration"
|
|
fi
|
|
while IFS= read -r path; do
|
|
[[ -n "$path" ]] || continue
|
|
|
|
store_base="$(basename "$path")"
|
|
store_hash="${store_base%%-*}"
|
|
nar_rel="nar/${store_base}.nar"
|
|
nar_path="$NIX_CACHE_DIR/$nar_rel"
|
|
|
|
if [[ ! -f "$nar_path" ]]; then
|
|
nix-store --dump "$path" >"$nar_path"
|
|
fi
|
|
|
|
nar_size="$(stat -c%s "$nar_path")"
|
|
nar_hash="$(nix hash file --type sha256 --base32 "$nar_path")"
|
|
refs="$(nix-store --query --references "$path" | xargs -r -n1 basename | tr '\n' ' ' | sed 's/ $//')"
|
|
deriver="$(nix-store --query --deriver "$path" 2>/dev/null || true)"
|
|
deriver="$(basename "$deriver" 2>/dev/null || true)"
|
|
|
|
{
|
|
echo "StorePath: $path"
|
|
echo "URL: $nar_rel"
|
|
echo "Compression: none"
|
|
echo "FileHash: sha256:$nar_hash"
|
|
echo "FileSize: $nar_size"
|
|
echo "NarHash: sha256:$nar_hash"
|
|
echo "NarSize: $nar_size"
|
|
echo "References: $refs"
|
|
if [[ -n "$deriver" && "$deriver" != "unknown-deriver" ]]; then
|
|
echo "Deriver: $deriver"
|
|
fi
|
|
} >"$NIX_CACHE_DIR/${store_hash}.narinfo"
|
|
done < <(
|
|
nix-store --query --requisites \
|
|
"$CONTROL_TARGET_SYSTEM" \
|
|
"$WORKER_TARGET_SYSTEM" \
|
|
"$CONTROL_DISKO_SCRIPT" \
|
|
"$WORKER_DISKO_SCRIPT" \
|
|
| sort -u
|
|
)
|
|
}
|
|
|
|
start_binary_cache() {
|
|
seed_binary_cache
|
|
|
|
log "Starting host-local Nix binary cache"
|
|
python3 -m http.server 8090 --bind 0.0.0.0 --directory "$NIX_CACHE_DIR" \
|
|
>"$NIX_CACHE_LOG" 2>&1 &
|
|
NIX_CACHE_PID="$!"
|
|
|
|
wait_for_http "${BINARY_CACHE_ENDPOINT}/nix-cache-info" 120 \
|
|
|| die "host-local Nix binary cache did not become reachable"
|
|
}
|
|
|
|
apply_cluster_state() {
|
|
cat >"$TMP_DIR/cluster-state.yaml" <<EOF
|
|
cluster:
|
|
cluster_id: ${CLUSTER_ID}
|
|
environment: qemu
|
|
|
|
node_classes:
|
|
- name: ${CONTROL_NODE_CLASS}
|
|
description: Canonical ISO-installed QEMU control-plane target
|
|
roles:
|
|
- control-plane
|
|
labels:
|
|
tier: control-plane
|
|
canonical_install_path: iso
|
|
install.ultracloud.io/profile: ${CONTROL_NIXOS_CONFIGURATION}
|
|
install_plan:
|
|
nixos_configuration: ${CONTROL_NIXOS_CONFIGURATION}
|
|
disko_config_path: ${CONTROL_DISKO_CONFIG_PATH}
|
|
target_disk_by_id: ${CONTROL_TARGET_DISK_BY_ID}
|
|
- name: ${WORKER_NODE_CLASS}
|
|
description: Canonical ISO-installed QEMU worker target
|
|
roles:
|
|
- worker
|
|
labels:
|
|
tier: worker
|
|
canonical_install_path: iso
|
|
install.ultracloud.io/profile: ${WORKER_NIXOS_CONFIGURATION}
|
|
install_plan:
|
|
nixos_configuration: ${WORKER_NIXOS_CONFIGURATION}
|
|
disko_config_path: ${WORKER_DISKO_CONFIG_PATH}
|
|
target_disk_by_id: ${WORKER_TARGET_DISK_BY_ID}
|
|
|
|
pools:
|
|
- name: control
|
|
description: ISO bare-metal control-plane pool
|
|
node_class: ${CONTROL_NODE_CLASS}
|
|
labels:
|
|
pool.ultracloud.io/name: control
|
|
- name: workers
|
|
description: ISO bare-metal worker pool
|
|
node_class: ${WORKER_NODE_CLASS}
|
|
labels:
|
|
pool.ultracloud.io/name: workers
|
|
|
|
nodes:
|
|
- node_id: ${CONTROL_NODE_ID}
|
|
hostname: ${CONTROL_NODE_ID}
|
|
ip: ${CONTROL_DHCP_START}
|
|
pool: control
|
|
desired_system:
|
|
nixos_configuration: ${CONTROL_NIXOS_CONFIGURATION}
|
|
target_system: ${CONTROL_TARGET_SYSTEM}
|
|
health_check_command:
|
|
- test
|
|
- -f
|
|
- ${CONTROL_HEALTH_CHECK_PATH}
|
|
rollback_on_failure: true
|
|
state: pending
|
|
- node_id: ${WORKER_NODE_ID}
|
|
hostname: ${WORKER_NODE_ID}
|
|
ip: ${WORKER_DHCP_START}
|
|
pool: workers
|
|
desired_system:
|
|
nixos_configuration: ${WORKER_NIXOS_CONFIGURATION}
|
|
target_system: ${WORKER_TARGET_SYSTEM}
|
|
health_check_command:
|
|
- test
|
|
- -f
|
|
- ${WORKER_HEALTH_CHECK_PATH}
|
|
rollback_on_failure: true
|
|
state: pending
|
|
|
|
enrollment_rules:
|
|
- name: ${CONTROL_NODE_CLASS}
|
|
priority: 200
|
|
match_hostname_prefix: iso-control-plane
|
|
pool: control
|
|
ssh_authorized_keys:
|
|
- ${SSH_PUBKEY}
|
|
- name: ${WORKER_NODE_CLASS}
|
|
priority: 190
|
|
match_hostname_prefix: iso-worker
|
|
pool: workers
|
|
ssh_authorized_keys:
|
|
- ${SSH_PUBKEY}
|
|
EOF
|
|
|
|
"$DEPLOYER_CTL_BIN" \
|
|
--chainfire-endpoint "$CHAINFIRE_ENDPOINT" \
|
|
--cluster-id "$CLUSTER_ID" \
|
|
--cluster-namespace ultracloud \
|
|
--deployer-namespace deployer \
|
|
apply --config "$TMP_DIR/cluster-state.yaml" --prune
|
|
}
|
|
|
|
launch_iso_vm() {
|
|
local label="$1"
|
|
local node_id="$2"
|
|
local ssh_port="$3"
|
|
local dhcp_start="$4"
|
|
local mac="$5"
|
|
local disk_serial="$6"
|
|
local disk_size="$7"
|
|
local disk_path="$8"
|
|
local log_path="$9"
|
|
local ovmf_vars_path="${disk_path}.ovmf-vars.fd"
|
|
|
|
"$QEMU_IMG_BIN" create -f qcow2 "$disk_path" "$disk_size" >/dev/null
|
|
rm -f "$ovmf_vars_path"
|
|
cp "$OVMF_VARS_TEMPLATE" "$ovmf_vars_path"
|
|
chmod u+w "$ovmf_vars_path"
|
|
|
|
nohup "$QEMU_BIN" \
|
|
-name "$label" \
|
|
-smp "${BAREMETAL_VM_VCPUS}" \
|
|
-m "${BAREMETAL_VM_MEMORY_MIB}" \
|
|
-nographic \
|
|
-no-reboot \
|
|
-boot order=dc,once=d,menu=off \
|
|
$(qemu_machine_args) \
|
|
-drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE_FD" \
|
|
-drive if=pflash,format=raw,file="$ovmf_vars_path" \
|
|
-drive id=systemdisk,if=none,file="$disk_path",format=qcow2 \
|
|
-device virtio-blk-pci,bootindex=1,drive=systemdisk,serial="$disk_serial" \
|
|
-cdrom "$ISO_IMAGE" \
|
|
-netdev user,id=user0,hostfwd=tcp:127.0.0.1:${ssh_port}-:22,dhcpstart=${dhcp_start} \
|
|
-device virtio-net-pci,netdev=user0,mac="${mac}" \
|
|
-smbios type=1,product=UltraCloudQEMUBaremetal,serial="${node_id}" \
|
|
>"$log_path" 2>&1 &
|
|
echo "$!" >"${log_path}.pid"
|
|
}
|
|
|
|
launch_installed_vm() {
|
|
local label="$1"
|
|
local ssh_port="$2"
|
|
local dhcp_start="$3"
|
|
local mac="$4"
|
|
local disk_serial="$5"
|
|
local disk_path="$6"
|
|
local log_path="$7"
|
|
local ovmf_vars_path="${disk_path}.ovmf-vars.fd"
|
|
|
|
[[ -f "$ovmf_vars_path" ]] || die "missing OVMF vars file for relaunch: $ovmf_vars_path"
|
|
|
|
nohup "$QEMU_BIN" \
|
|
-name "$label" \
|
|
-smp "${BAREMETAL_VM_VCPUS}" \
|
|
-m "${BAREMETAL_VM_MEMORY_MIB}" \
|
|
-nographic \
|
|
$(qemu_machine_args) \
|
|
-drive if=pflash,format=raw,readonly=on,file="$OVMF_CODE_FD" \
|
|
-drive if=pflash,format=raw,file="$ovmf_vars_path" \
|
|
-drive id=systemdisk,if=none,file="$disk_path",format=qcow2 \
|
|
-device virtio-blk-pci,bootindex=1,drive=systemdisk,serial="$disk_serial" \
|
|
-netdev user,id=user0,hostfwd=tcp:127.0.0.1:${ssh_port}-:22,dhcpstart=${dhcp_start} \
|
|
-device virtio-net-pci,netdev=user0,mac="${mac}" \
|
|
>>"$log_path" 2>&1 &
|
|
echo "$!" >"${log_path}.pid"
|
|
}
|
|
|
|
wait_for_pid_exit() {
|
|
local label="$1"
|
|
local pid_file="$2"
|
|
local timeout_secs="$3"
|
|
local deadline=$((SECONDS + timeout_secs))
|
|
local pid
|
|
|
|
[[ -f "$pid_file" ]] || die "${label} is missing pid file $pid_file"
|
|
pid="$(cat "$pid_file")"
|
|
while (( SECONDS < deadline )); do
|
|
if ! kill -0 "$pid" >/dev/null 2>&1; then
|
|
log "${label}: QEMU exited after installer-triggered reboot"
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
verify_node() {
|
|
local node_id="$1"
|
|
local ssh_port="$2"
|
|
local disk_path="$3"
|
|
local log_path="$4"
|
|
local expected_role="$5"
|
|
local expected_system="$6"
|
|
local expected_nixos_configuration="$7"
|
|
local expected_node_class="$8"
|
|
local expected_disko_config_path="$9"
|
|
local expected_target_disk_by_id="${10}"
|
|
local expected_health_check_path="${11}"
|
|
local dhcp_start="${12}"
|
|
local mac="${13}"
|
|
local disk_serial="${14}"
|
|
|
|
wait_for_log_marker "$node_id" "$TMP_DIR/deployer.log" "Node registered successfully.*node_id=${node_id}" 900 \
|
|
|| die "${node_id} never completed /api/v1/phone-home registration"
|
|
assert_node_contract \
|
|
"$node_id" \
|
|
"$expected_node_class" \
|
|
"$expected_nixos_configuration" \
|
|
"$expected_disko_config_path" \
|
|
"$expected_target_disk_by_id" \
|
|
"$expected_health_check_path" \
|
|
"$expected_system"
|
|
wait_for_ssh "$node_id" "$ssh_port" 900 \
|
|
|| die "${node_id} never exposed SSH during the installer boot"
|
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER pre-install.boot.${node_id}" 120 \
|
|
ultracloud-bootstrap.service ultracloud-install.service \
|
|
|| die "${node_id} never recorded the pre-install boot marker"
|
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER pre-install.phone-home.complete.${node_id}" 120 \
|
|
ultracloud-bootstrap.service ultracloud-install.service \
|
|
|| die "${node_id} never recorded the phone-home completion marker"
|
|
marker "pre-install.${node_id}"
|
|
|
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.bundle-downloaded.${node_id}" 1200 \
|
|
ultracloud-install.service \
|
|
|| die "${node_id} never downloaded the flake bundle"
|
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.disko.complete.${node_id}" 2400 \
|
|
ultracloud-install.service \
|
|
|| die "${node_id} never completed disko"
|
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER install.nixos-install.complete.${node_id}" 3600 \
|
|
ultracloud-install.service \
|
|
|| die "${node_id} never finished nixos-install"
|
|
marker "install.${node_id}"
|
|
|
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER reboot.${node_id}" 3600 \
|
|
ultracloud-install.service \
|
|
|| die "${node_id} never emitted reboot marker"
|
|
marker "reboot.${node_id}"
|
|
|
|
wait_for_pid_exit "$node_id" "${log_path}.pid" 300 \
|
|
|| die "${node_id} installer VM did not exit after the reboot marker"
|
|
launch_installed_vm \
|
|
"ultracloud-baremetal-${node_id}-installed" \
|
|
"$ssh_port" \
|
|
"$dhcp_start" \
|
|
"$mac" \
|
|
"$disk_serial" \
|
|
"$disk_path" \
|
|
"$log_path"
|
|
wait_for_ssh "$node_id" "$ssh_port" 1800 \
|
|
|| die "${node_id} did not come back over SSH after reboot"
|
|
wait_for_remote_journal_marker "$node_id" "$ssh_port" "ULTRACLOUD_MARKER post-install.boot.${node_id}.${expected_role}" 1800 \
|
|
ultracloud-baremetal-postinstall-marker.service \
|
|
|| die "${node_id} never emitted post-install marker"
|
|
marker "post-install.${node_id}"
|
|
|
|
ssh_shell "$ssh_port" 'test -f /etc/ultracloud/node-config.json'
|
|
ssh_shell "$ssh_port" 'test -d /var/lib/photon-src/.bundle-inputs/nixpkgs'
|
|
wait_for_remote_unit_active "$node_id" "$ssh_port" "nix-agent.service" 180 \
|
|
|| die "${node_id} never started nix-agent.service after install"
|
|
ssh_shell "$ssh_port" "grep -Fx '${expected_role}' /etc/ultracloud-role"
|
|
ssh_shell "$ssh_port" "test -b '${expected_target_disk_by_id}'"
|
|
if [[ "$expected_role" == "control-plane" ]]; then
|
|
wait_for_remote_unit_active "$node_id" "$ssh_port" "chainfire.service" 180 \
|
|
|| die "${node_id} never started chainfire.service after install"
|
|
fi
|
|
|
|
wait_for_observed_active "$node_id" 1200 \
|
|
|| die "${node_id} never reached observed-system active"
|
|
[[ "$(current_system_path "$ssh_port")" == "$expected_system" ]] \
|
|
|| die "${node_id} current system does not match expected target"
|
|
marker "desired-system-active.${node_id}"
|
|
}
|
|
|
|
cleanup() {
|
|
local status="$?"
|
|
set +e
|
|
|
|
if [[ -n "${TMP_DIR:-}" && -d "${TMP_DIR}" ]]; then
|
|
{
|
|
printf 'finished_at=%s\n' "$(date -Is)"
|
|
printf 'exit_status=%s\n' "$status"
|
|
} >>"$TMP_DIR/environment.txt"
|
|
fi
|
|
|
|
for pid_file in "$CONTROL_LOG.pid" "$WORKER_LOG.pid"; do
|
|
if [[ -f "$pid_file" ]]; then
|
|
pid="$(cat "$pid_file")"
|
|
kill "$pid" 2>/dev/null || true
|
|
wait "$pid" 2>/dev/null || true
|
|
fi
|
|
done
|
|
|
|
if [[ -n "${DEPLOYER_PID:-}" ]]; then
|
|
kill "$DEPLOYER_PID" 2>/dev/null || true
|
|
wait "$DEPLOYER_PID" 2>/dev/null || true
|
|
fi
|
|
if [[ -n "${CHAINFIRE_PID:-}" ]]; then
|
|
kill "$CHAINFIRE_PID" 2>/dev/null || true
|
|
wait "$CHAINFIRE_PID" 2>/dev/null || true
|
|
fi
|
|
if [[ -n "${NIX_CACHE_PID:-}" ]]; then
|
|
kill "$NIX_CACHE_PID" 2>/dev/null || true
|
|
wait "$NIX_CACHE_PID" 2>/dev/null || true
|
|
fi
|
|
|
|
if (( status != 0 )); then
|
|
log "control-plane serial log tail:"
|
|
tail -n 120 "$CONTROL_LOG" 2>/dev/null || true
|
|
log "worker serial log tail:"
|
|
tail -n 120 "$WORKER_LOG" 2>/dev/null || true
|
|
log "deployer log tail:"
|
|
tail -n 120 "$DEPLOYER_LOG" 2>/dev/null || true
|
|
log "chainfire log tail:"
|
|
tail -n 120 "$CHAINFIRE_LOG" 2>/dev/null || true
|
|
log "binary cache log tail:"
|
|
tail -n 120 "$NIX_CACHE_LOG" 2>/dev/null || true
|
|
fi
|
|
|
|
if [[ "${KEEP_STATE_DIR:-0}" != "1" ]]; then
|
|
rm -rf "$TMP_DIR"
|
|
fi
|
|
exit "$status"
|
|
}
|
|
|
|
main() {
|
|
DEFAULT_WORK_ROOT="$(resolve_default_work_root)"
|
|
HOST_CPU_COUNT="$(host_cpu_count)"
|
|
LOCAL_NIX_MAX_JOBS="${ULTRACLOUD_BAREMETAL_NIX_MAX_JOBS:-${ULTRACLOUD_LOCAL_NIX_MAX_JOBS:-$(default_local_nix_max_jobs "${HOST_CPU_COUNT}")}}"
|
|
LOCAL_NIX_BUILD_CORES="${ULTRACLOUD_BAREMETAL_NIX_BUILD_CORES:-${ULTRACLOUD_LOCAL_NIX_BUILD_CORES:-$(default_local_nix_build_cores "${HOST_CPU_COUNT}" "${LOCAL_NIX_MAX_JOBS}")}}"
|
|
BAREMETAL_VM_VCPUS="${ULTRACLOUD_BAREMETAL_VM_VCPUS:-$(default_baremetal_vm_vcpus "${HOST_CPU_COUNT}")}"
|
|
BAREMETAL_VM_MEMORY_MIB="${ULTRACLOUD_BAREMETAL_VM_MEMORY_MIB:-$(default_baremetal_vm_memory_mib "${HOST_CPU_COUNT}")}"
|
|
if [[ "${ULTRACLOUD_BAREMETAL_FORCE_TCG:-0}" == "1" ]]; then
|
|
BAREMETAL_VM_ACCELERATOR_MODE="tcg"
|
|
elif host_kvm_access; then
|
|
BAREMETAL_VM_ACCELERATOR_MODE="kvm"
|
|
else
|
|
BAREMETAL_VM_ACCELERATOR_MODE="tcg"
|
|
fi
|
|
configure_local_nix_execution
|
|
|
|
require_cmd curl
|
|
require_cmd jq
|
|
require_cmd nix
|
|
require_cmd python3
|
|
require_cmd qemu-img
|
|
require_cmd qemu-system-x86_64
|
|
require_cmd ssh
|
|
require_cmd ssh-keygen
|
|
require_cmd ss
|
|
|
|
ISO_IMAGE="$(resolve_iso_image "$(resolve_store_path ULTRACLOUD_BAREMETAL_ISO_IMAGE 'nixosConfigurations.ultracloud-iso.config.system.build.isoImage')")"
|
|
FLAKE_BUNDLE="$(resolve_store_path ULTRACLOUD_BAREMETAL_FLAKE_BUNDLE 'packages.x86_64-linux.ultracloudFlakeBundle')"
|
|
CONTROL_TARGET_SYSTEM="$(resolve_store_path ULTRACLOUD_BAREMETAL_CONTROL_TARGET 'nixosConfigurations.baremetal-qemu-control-plane.config.system.build.toplevel')"
|
|
WORKER_TARGET_SYSTEM="$(resolve_store_path ULTRACLOUD_BAREMETAL_WORKER_TARGET 'nixosConfigurations.baremetal-qemu-worker.config.system.build.toplevel')"
|
|
CONTROL_DISKO_SCRIPT="$(resolve_store_path ULTRACLOUD_BAREMETAL_CONTROL_DISKO_SCRIPT 'nixosConfigurations.baremetal-qemu-control-plane.config.system.build.formatMount')"
|
|
WORKER_DISKO_SCRIPT="$(resolve_store_path ULTRACLOUD_BAREMETAL_WORKER_DISKO_SCRIPT 'nixosConfigurations.baremetal-qemu-worker.config.system.build.formatMount')"
|
|
CHAINFIRE_BIN="$(resolve_binary ULTRACLOUD_CHAINFIRE_SERVER_BIN chainfire 'packages.x86_64-linux.chainfire-server')"
|
|
DEPLOYER_SERVER_BIN="$(resolve_binary ULTRACLOUD_DEPLOYER_SERVER_BIN deployer-server 'packages.x86_64-linux.deployer-server')"
|
|
DEPLOYER_CTL_BIN="$(resolve_binary ULTRACLOUD_DEPLOYER_CTL_BIN deployer-ctl 'packages.x86_64-linux.deployer-ctl')"
|
|
OVMF_CODE_FD="$(resolve_ovmf_firmware ULTRACLOUD_OVMF_CODE 'FV/OVMF_CODE.fd')"
|
|
OVMF_VARS_TEMPLATE="$(resolve_ovmf_firmware ULTRACLOUD_OVMF_VARS 'FV/OVMF_VARS.fd')"
|
|
QEMU_BIN="${ULTRACLOUD_QEMU_BIN:-$(command -v qemu-system-x86_64)}"
|
|
QEMU_IMG_BIN="${ULTRACLOUD_QEMU_IMG_BIN:-$(command -v qemu-img)}"
|
|
|
|
if [[ -n "${ULTRACLOUD_BAREMETAL_STATE_DIR:-}" ]]; then
|
|
TMP_DIR="$ULTRACLOUD_BAREMETAL_STATE_DIR"
|
|
KEEP_STATE_DIR=1
|
|
mkdir -p "$TMP_DIR"
|
|
find "$TMP_DIR" -mindepth 1 -maxdepth 1 \
|
|
! -name nix-cache \
|
|
-exec rm -rf {} +
|
|
else
|
|
TMP_DIR="${DEFAULT_WORK_ROOT}/baremetal-iso"
|
|
KEEP_STATE_DIR=1
|
|
mkdir -p "$TMP_DIR"
|
|
find "$TMP_DIR" -mindepth 1 -maxdepth 1 \
|
|
! -name nix-cache \
|
|
-exec rm -rf {} +
|
|
fi
|
|
export TMPDIR="${TMPDIR:-${DEFAULT_WORK_ROOT}/tmp}"
|
|
export XDG_CACHE_HOME="${XDG_CACHE_HOME:-${DEFAULT_WORK_ROOT}/xdg-cache}"
|
|
mkdir -p "$TMPDIR"
|
|
mkdir -p "$XDG_CACHE_HOME"
|
|
NIX_CACHE_DIR="$TMP_DIR/nix-cache"
|
|
CONTROL_LOG="$TMP_DIR/control-plane.serial.log"
|
|
WORKER_LOG="$TMP_DIR/worker.serial.log"
|
|
DEPLOYER_LOG="$TMP_DIR/deployer.log"
|
|
CHAINFIRE_LOG="$TMP_DIR/chainfire.log"
|
|
NIX_CACHE_LOG="$TMP_DIR/nix-cache.log"
|
|
trap cleanup EXIT
|
|
|
|
SSH_KEY="$TMP_DIR/id_ed25519"
|
|
ssh-keygen -q -t ed25519 -N "" -f "$SSH_KEY" >/dev/null
|
|
SSH_PUBKEY="$(tr -d '\n' <"$SSH_KEY.pub")"
|
|
capture_environment
|
|
|
|
assert_port_free 2379
|
|
assert_port_free 8081
|
|
assert_port_free 8088
|
|
assert_port_free 8090
|
|
assert_port_free "$CONTROL_SSH_PORT"
|
|
assert_port_free "$WORKER_SSH_PORT"
|
|
|
|
start_binary_cache
|
|
start_host_services
|
|
apply_cluster_state
|
|
|
|
launch_iso_vm \
|
|
"ultracloud-baremetal-control-plane" \
|
|
"$CONTROL_NODE_ID" \
|
|
"$CONTROL_SSH_PORT" \
|
|
"$CONTROL_DHCP_START" \
|
|
"52:54:00:11:22:31" \
|
|
"$CONTROL_DISK_SERIAL" \
|
|
"$CONTROL_DISK_GIB" \
|
|
"$TMP_DIR/control-plane.qcow2" \
|
|
"$CONTROL_LOG"
|
|
|
|
verify_node \
|
|
"$CONTROL_NODE_ID" \
|
|
"$CONTROL_SSH_PORT" \
|
|
"$TMP_DIR/control-plane.qcow2" \
|
|
"$CONTROL_LOG" \
|
|
"control-plane" \
|
|
"$CONTROL_TARGET_SYSTEM" \
|
|
"$CONTROL_NIXOS_CONFIGURATION" \
|
|
"$CONTROL_NODE_CLASS" \
|
|
"$CONTROL_DISKO_CONFIG_PATH" \
|
|
"$CONTROL_TARGET_DISK_BY_ID" \
|
|
"$CONTROL_HEALTH_CHECK_PATH" \
|
|
"$CONTROL_DHCP_START" \
|
|
"52:54:00:11:22:31" \
|
|
"$CONTROL_DISK_SERIAL"
|
|
|
|
launch_iso_vm \
|
|
"ultracloud-baremetal-worker" \
|
|
"$WORKER_NODE_ID" \
|
|
"$WORKER_SSH_PORT" \
|
|
"$WORKER_DHCP_START" \
|
|
"52:54:00:11:22:32" \
|
|
"$WORKER_DISK_SERIAL" \
|
|
"$WORKER_DISK_GIB" \
|
|
"$TMP_DIR/worker.qcow2" \
|
|
"$WORKER_LOG"
|
|
|
|
verify_node \
|
|
"$WORKER_NODE_ID" \
|
|
"$WORKER_SSH_PORT" \
|
|
"$TMP_DIR/worker.qcow2" \
|
|
"$WORKER_LOG" \
|
|
"worker" \
|
|
"$WORKER_TARGET_SYSTEM" \
|
|
"$WORKER_NIXOS_CONFIGURATION" \
|
|
"$WORKER_NODE_CLASS" \
|
|
"$WORKER_DISKO_CONFIG_PATH" \
|
|
"$WORKER_TARGET_DISK_BY_ID" \
|
|
"$WORKER_HEALTH_CHECK_PATH" \
|
|
"$WORKER_DHCP_START" \
|
|
"52:54:00:11:22:32" \
|
|
"$WORKER_DISK_SERIAL"
|
|
|
|
log "Canonical ISO bare-metal QEMU verification succeeded"
|
|
}
|
|
|
|
main "$@"
|