300 lines
11 KiB
Nix
300 lines
11 KiB
Nix
# PlasmaCloud Bootstrap ISO
|
|
# Minimal ISO with DHCP + Phone Home to Deployer + Auto-Install
|
|
# For VM cluster deployment: boots, phones home, partitions disk, installs NixOS
|
|
|
|
{ config, lib, pkgs, modulesPath, ... }:
|
|
|
|
{
|
|
imports = [
|
|
"${modulesPath}/installer/cd-dvd/installation-cd-minimal.nix"
|
|
];
|
|
|
|
# ISO metadata
|
|
isoImage = {
|
|
isoName = "plasmacloud-bootstrap.iso";
|
|
makeEfiBootable = true;
|
|
makeUsbBootable = true;
|
|
};
|
|
|
|
# Embed the repository into the ISO for offline flake install
|
|
isoImage.contents = [
|
|
{ source = ../../.; target = "/opt/plasmacloud-src"; }
|
|
];
|
|
|
|
# Minimal network: DHCP on all interfaces
|
|
networking.useNetworkd = true;
|
|
networking.networkmanager.enable = lib.mkForce false;
|
|
systemd.network.networks."10-dhcp" = {
|
|
matchConfig.Name = "*";
|
|
DHCP = "yes";
|
|
};
|
|
|
|
# Phone Home service — fetches secrets from Deployer
|
|
systemd.services.plasmacloud-bootstrap = {
|
|
description = "PlasmaCloud Bootstrap via Phone Home";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
RemainAfterExit = true;
|
|
};
|
|
|
|
script = ''
|
|
set -euo pipefail
|
|
|
|
# Discover Deployer via DNS or fallback
|
|
DEPLOYER_URL="''${DEPLOYER_URL:-http://192.168.100.1:8080}"
|
|
|
|
# Get machine identity
|
|
MACHINE_ID=$(cat /etc/machine-id)
|
|
|
|
echo "PlasmaCloud Bootstrap starting..."
|
|
echo "Machine ID: $MACHINE_ID"
|
|
echo "Deployer URL: $DEPLOYER_URL"
|
|
|
|
# Optional bootstrap token (from file or environment)
|
|
TOKEN_FILE="/etc/plasmacloud/bootstrap-token"
|
|
DEPLOYER_TOKEN=""
|
|
if [ -s "$TOKEN_FILE" ]; then
|
|
DEPLOYER_TOKEN=$(cat "$TOKEN_FILE")
|
|
elif [ -n "''${DEPLOYER_BOOTSTRAP_TOKEN:-}" ]; then
|
|
DEPLOYER_TOKEN="''${DEPLOYER_BOOTSTRAP_TOKEN}"
|
|
fi
|
|
|
|
CURL_ARGS=(-sf --connect-timeout 5 --max-time 15)
|
|
if [ -n "$DEPLOYER_TOKEN" ]; then
|
|
CURL_ARGS+=(-H "X-Deployer-Token: $DEPLOYER_TOKEN")
|
|
fi
|
|
if [ -n "''${DEPLOYER_CA_CERT:-}" ] && [ -f "''${DEPLOYER_CA_CERT}" ]; then
|
|
CURL_ARGS+=(--cacert "''${DEPLOYER_CA_CERT}")
|
|
fi
|
|
|
|
NODE_IP=$(${pkgs.iproute2}/bin/ip -4 route get 1.1.1.1 2>/dev/null | ${pkgs.gawk}/bin/awk '{for(i=1;i<=NF;i++) if ($i=="src") {print $(i+1); exit}}')
|
|
if [ -z "$NODE_IP" ]; then
|
|
NODE_IP=$(${pkgs.iproute2}/bin/ip -4 addr show scope global 2>/dev/null | ${pkgs.gawk}/bin/awk '/inet / {sub("/.*","",$2); print $2; exit}')
|
|
fi
|
|
if [ -z "$NODE_IP" ]; then
|
|
NODE_IP=$(hostname -I 2>/dev/null | ${pkgs.gawk}/bin/awk '{print $1}')
|
|
fi
|
|
NODE_HOSTNAME=$(hostname)
|
|
|
|
# Phone Home request with retry
|
|
for i in 1 2 3 4 5; do
|
|
echo "Attempt $i/5: Contacting Deployer..."
|
|
|
|
if RESPONSE=$(${pkgs.curl}/bin/curl "''${CURL_ARGS[@]}" -X POST \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"machine_id\": \"$MACHINE_ID\", \"node_id\": \"$NODE_HOSTNAME\", \"hostname\": \"$NODE_HOSTNAME\", \"ip\": \"$NODE_IP\"}" \
|
|
"$DEPLOYER_URL/api/v1/phone-home"); then
|
|
|
|
echo "✓ Phone Home successful"
|
|
|
|
# Create directories
|
|
mkdir -p /etc/ssh /etc/plasmacloud /root/.ssh
|
|
|
|
# Validate success flag
|
|
SUCCESS=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.success // false' || echo "false")
|
|
if [ "$SUCCESS" != "true" ]; then
|
|
MESSAGE=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.message // empty' || true)
|
|
echo "✗ Phone Home rejected: $MESSAGE"
|
|
sleep $((2 ** i))
|
|
continue
|
|
fi
|
|
|
|
# Extract and apply secrets
|
|
NODE_CONFIG=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -c '.node_config // empty' || true)
|
|
if [ -z "$NODE_CONFIG" ] || [ "$NODE_CONFIG" = "null" ]; then
|
|
echo "✗ Phone Home response missing node_config"
|
|
sleep $((2 ** i))
|
|
continue
|
|
fi
|
|
echo "$NODE_CONFIG" > /etc/plasmacloud/node-config.json
|
|
echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config.ssh_authorized_keys[]?' > /root/.ssh/authorized_keys
|
|
|
|
# Apply SSH host key if provided
|
|
SSH_HOST_KEY=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.ssh_host_key // empty')
|
|
if [ -n "$SSH_HOST_KEY" ]; then
|
|
umask 077
|
|
echo "$SSH_HOST_KEY" > /etc/ssh/ssh_host_ed25519_key
|
|
${pkgs.openssh}/bin/ssh-keygen -y -f /etc/ssh/ssh_host_ed25519_key > /etc/ssh/ssh_host_ed25519_key.pub
|
|
fi
|
|
|
|
# Apply TLS material if provided
|
|
TLS_CERT=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.tls_cert // empty')
|
|
TLS_KEY=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.tls_key // empty')
|
|
if [ -n "$TLS_CERT" ] && [ -n "$TLS_KEY" ]; then
|
|
umask 077
|
|
mkdir -p /etc/plasmacloud/tls
|
|
echo "$TLS_CERT" > /etc/plasmacloud/tls/node.crt
|
|
echo "$TLS_KEY" > /etc/plasmacloud/tls/node.key
|
|
fi
|
|
|
|
# Generate host keys locally if missing
|
|
if [ ! -s /etc/ssh/ssh_host_ed25519_key ]; then
|
|
${pkgs.openssh}/bin/ssh-keygen -A
|
|
fi
|
|
|
|
# Set permissions
|
|
chmod 644 /etc/plasmacloud/node-config.json 2>/dev/null || true
|
|
chmod 700 /root/.ssh 2>/dev/null || true
|
|
chmod 600 /root/.ssh/authorized_keys 2>/dev/null || true
|
|
chmod 600 /etc/ssh/ssh_host_ed25519_key 2>/dev/null || true
|
|
chmod 644 /etc/ssh/ssh_host_ed25519_key.pub 2>/dev/null || true
|
|
chmod 600 /etc/plasmacloud/tls/node.key 2>/dev/null || true
|
|
chmod 644 /etc/plasmacloud/tls/node.crt 2>/dev/null || true
|
|
|
|
# Signal success
|
|
NODE_ID=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_id // "unknown"')
|
|
echo "✓ Bootstrap complete: $NODE_ID"
|
|
exit 0
|
|
else
|
|
echo "✗ Phone Home failed, attempt $i/5"
|
|
sleep $((2 ** i))
|
|
fi
|
|
done
|
|
|
|
echo "✗ Bootstrap failed after 5 attempts"
|
|
exit 1
|
|
'';
|
|
};
|
|
|
|
# Auto-install service - partitions disk and runs nixos-install
|
|
systemd.services.plasmacloud-install = {
|
|
description = "PlasmaCloud Auto-Install to Disk";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "plasmacloud-bootstrap.service" ];
|
|
requires = [ "plasmacloud-bootstrap.service" ];
|
|
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
RemainAfterExit = true;
|
|
StandardOutput = "journal+console";
|
|
StandardError = "journal+console";
|
|
};
|
|
|
|
script = ''
|
|
set -euo pipefail
|
|
|
|
if [ ! -s /etc/plasmacloud/node-config.json ]; then
|
|
echo "ERROR: node-config.json missing (bootstrap not complete?)"
|
|
exit 1
|
|
fi
|
|
|
|
NODE_ID=$(${pkgs.jq}/bin/jq -r '.hostname // empty' /etc/plasmacloud/node-config.json)
|
|
NODE_IP=$(${pkgs.jq}/bin/jq -r '.ip // empty' /etc/plasmacloud/node-config.json)
|
|
NIXOS_CONFIGURATION=$(${pkgs.jq}/bin/jq -r '.install_plan.nixos_configuration // .hostname // empty' /etc/plasmacloud/node-config.json)
|
|
DISKO_PATH=$(${pkgs.jq}/bin/jq -r '.install_plan.disko_config_path // empty' /etc/plasmacloud/node-config.json)
|
|
DEPLOYER_URL="''${DEPLOYER_URL:-http://192.168.100.1:8080}"
|
|
SRC_ROOT="/opt/plasmacloud-src"
|
|
|
|
if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then
|
|
echo "ERROR: node-config.json missing hostname/ip"
|
|
exit 1
|
|
fi
|
|
|
|
if [ -z "$NIXOS_CONFIGURATION" ]; then
|
|
echo "ERROR: node-config.json missing install_plan.nixos_configuration"
|
|
exit 1
|
|
fi
|
|
|
|
TOKEN_FILE="/etc/plasmacloud/bootstrap-token"
|
|
DEPLOYER_TOKEN=""
|
|
if [ -s "$TOKEN_FILE" ]; then
|
|
DEPLOYER_TOKEN=$(cat "$TOKEN_FILE")
|
|
elif [ -n "''${DEPLOYER_BOOTSTRAP_TOKEN:-}" ]; then
|
|
DEPLOYER_TOKEN="''${DEPLOYER_BOOTSTRAP_TOKEN}"
|
|
fi
|
|
|
|
CURL_ARGS=(-sfL --connect-timeout 5 --max-time 120)
|
|
if [ -n "$DEPLOYER_TOKEN" ]; then
|
|
CURL_ARGS+=(-H "X-Deployer-Token: $DEPLOYER_TOKEN")
|
|
fi
|
|
if [ -n "''${DEPLOYER_CA_CERT:-}" ] && [ -f "''${DEPLOYER_CA_CERT}" ]; then
|
|
CURL_ARGS+=(--cacert "''${DEPLOYER_CA_CERT}")
|
|
fi
|
|
|
|
BUNDLE_PATH="/run/plasmacloud/flake-bundle.tar.gz"
|
|
mkdir -p /run/plasmacloud
|
|
if ${pkgs.curl}/bin/curl "''${CURL_ARGS[@]}" \
|
|
"$DEPLOYER_URL/api/v1/bootstrap/flake-bundle" \
|
|
-o "$BUNDLE_PATH"; then
|
|
echo "Downloaded bootstrap flake bundle from deployer"
|
|
rm -rf "$SRC_ROOT"
|
|
mkdir -p "$SRC_ROOT"
|
|
${pkgs.gzip}/bin/gzip -dc "$BUNDLE_PATH" | ${pkgs.gnutar}/bin/tar -xf - -C "$SRC_ROOT"
|
|
else
|
|
echo "No deployer flake bundle available; using embedded source tree"
|
|
fi
|
|
|
|
if [ -z "$DISKO_PATH" ]; then
|
|
CANDIDATE_DISKO="nix/nodes/vm-cluster/$NODE_ID/disko.nix"
|
|
if [ -f "$SRC_ROOT/$CANDIDATE_DISKO" ]; then
|
|
DISKO_PATH="$CANDIDATE_DISKO"
|
|
fi
|
|
fi
|
|
|
|
if [ -z "$DISKO_PATH" ]; then
|
|
echo "ERROR: node-config.json missing install_plan.disko_config_path and no default Disko path exists for $NODE_ID"
|
|
exit 1
|
|
fi
|
|
|
|
if [ ! -f "$SRC_ROOT/$DISKO_PATH" ]; then
|
|
echo "ERROR: Disko config not found: $SRC_ROOT/$DISKO_PATH"
|
|
exit 1
|
|
fi
|
|
|
|
echo "PlasmaCloud install starting for $NODE_ID (ip=$NODE_IP, nixos_configuration=$NIXOS_CONFIGURATION, disko_path=$DISKO_PATH)"
|
|
|
|
# Find disk
|
|
DISK=$(${pkgs.util-linux}/bin/lsblk -dpno NAME,TYPE | ${pkgs.gawk}/bin/awk '$2=="disk"{print $1; exit}')
|
|
if [ -z "$DISK" ]; then
|
|
echo "ERROR: No disk found"
|
|
exit 1
|
|
fi
|
|
|
|
ROOT_PART="''${DISK}2"
|
|
mkdir -p /mnt
|
|
|
|
# Skip if already installed
|
|
if ${pkgs.util-linux}/bin/lsblk -no FSTYPE "$ROOT_PART" 2>/dev/null | ${pkgs.gnugrep}/bin/grep -q '^ext4$'; then
|
|
mount "$ROOT_PART" /mnt 2>/dev/null || true
|
|
if [ -e /mnt/etc/NIXOS ]; then
|
|
echo "✓ Existing NixOS detected; skipping install"
|
|
umount /mnt || true
|
|
exit 0
|
|
fi
|
|
umount /mnt || true
|
|
fi
|
|
|
|
echo "Validating NixOS configuration output..."
|
|
nix eval --raw "$SRC_ROOT#nixosConfigurations.$NIXOS_CONFIGURATION.config.system.build.toplevel.drvPath" >/dev/null
|
|
|
|
echo "Running disko to partition $DISK..."
|
|
export NIX_CONFIG="experimental-features = nix-command flakes"
|
|
nix run github:nix-community/disko -- --mode disko "$SRC_ROOT/$DISKO_PATH"
|
|
|
|
echo "Running nixos-install..."
|
|
nixos-install --flake "$SRC_ROOT#$NIXOS_CONFIGURATION" --no-root-passwd
|
|
|
|
sync
|
|
echo "✓ Install complete; rebooting..."
|
|
${pkgs.systemd}/bin/systemctl reboot
|
|
'';
|
|
};
|
|
|
|
# Packages for bootstrap + install
|
|
environment.systemPackages = with pkgs; [
|
|
curl jq vim htop gawk gnugrep util-linux parted dosfstools e2fsprogs gnutar gzip
|
|
];
|
|
|
|
# SSH with key-based auth for non-interactive access
|
|
services.openssh = {
|
|
enable = true;
|
|
settings.PermitRootLogin = "prohibit-password";
|
|
};
|
|
|
|
# SSH access keys are provisioned dynamically via phone-home
|
|
users.users.root.openssh.authorizedKeys.keys = [ ];
|
|
}
|