photoncloud-monorepo/nix/iso/plasmacloud-iso.nix
centra 54e3a16091 fix(nix): Align service ExecStart with actual binary CLI interfaces
- chainfire: Fix binary name (chainfire-server → chainfire)
- fiberlb: Use --grpc-addr instead of --port
- flaredb: Use --addr instead of --api-addr/--raft-addr
- flashdns: Add --grpc-addr and --dns-addr flags
- iam: Use --addr instead of --port/--data-dir
- k8shost: Add --iam-server-addr for dynamic IAM port connection
- lightningstor: Add --in-memory-metadata for ChainFire fallback
- plasmavmc: Add ChainFire service dependency and endpoint env var
- prismnet: Use --grpc-addr instead of --port

These fixes are required for T039 production deployment. The
plasmavmc change specifically fixes the ChainFire port mismatch
(was hardcoded 50051, now uses chainfire.port = 2379).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-18 22:58:40 +09:00

194 lines
5.9 KiB
Nix

# PlasmaCloud Bootstrap ISO
# Minimal ISO with DHCP + Phone Home to Deployer + Auto-Install
# For VM cluster deployment: boots, phones home, partitions disk, installs NixOS
{ config, lib, pkgs, modulesPath, ... }:
{
imports = [
"${modulesPath}/installer/cd-dvd/installation-cd-minimal.nix"
];
# ISO metadata
isoImage = {
isoName = "plasmacloud-bootstrap.iso";
makeEfiBootable = true;
makeUsbBootable = true;
};
# Embed the repository into the ISO for offline flake install
isoImage.contents = [
{ source = ../../.; target = "/opt/plasmacloud-src"; }
];
# Minimal network: DHCP on all interfaces
networking.useNetworkd = true;
networking.networkmanager.enable = lib.mkForce false;
systemd.network.networks."10-dhcp" = {
matchConfig.Name = "*";
DHCP = "yes";
};
# Phone Home service — fetches secrets from Deployer
systemd.services.plasmacloud-bootstrap = {
description = "PlasmaCloud Bootstrap via Phone Home";
wantedBy = [ "multi-user.target" ];
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
script = ''
# Discover Deployer via DNS or fallback
DEPLOYER_URL="''${DEPLOYER_URL:-http://deployer.local:8080}"
# Get machine identity
MACHINE_ID=$(cat /etc/machine-id)
echo "PlasmaCloud Bootstrap starting..."
echo "Machine ID: $MACHINE_ID"
echo "Deployer URL: $DEPLOYER_URL"
# Phone Home request with retry
for i in 1 2 3 4 5; do
echo "Attempt $i/5: Contacting Deployer..."
if RESPONSE=$(${pkgs.curl}/bin/curl -sf -X POST \
-H "Content-Type: application/json" \
-d "{\"machine_id\": \"$MACHINE_ID\"}" \
"$DEPLOYER_URL/api/v1/phone-home"); then
echo " Phone Home successful"
# Create directories
mkdir -p /etc/ssh /etc/plasmacloud
# Extract and apply secrets
echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.ssh_host_key // empty' > /etc/ssh/ssh_host_ed25519_key
echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_config // empty' > /etc/plasmacloud/node-config.json
# Set permissions
chmod 600 /etc/ssh/ssh_host_ed25519_key 2>/dev/null || true
chmod 644 /etc/plasmacloud/node-config.json 2>/dev/null || true
# Signal success
NODE_ID=$(echo "$RESPONSE" | ${pkgs.jq}/bin/jq -r '.node_id // "unknown"')
echo " Bootstrap complete: $NODE_ID"
exit 0
else
echo " Phone Home failed, attempt $i/5"
sleep $((2 ** i))
fi
done
echo " Bootstrap failed after 5 attempts"
exit 1
'';
};
# Auto-install service - partitions disk and runs nixos-install
systemd.services.plasmacloud-install = {
description = "PlasmaCloud Auto-Install to Disk";
wantedBy = [ "multi-user.target" ];
after = [ "plasmacloud-bootstrap.service" ];
requires = [ "plasmacloud-bootstrap.service" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
StandardOutput = "journal+console";
StandardError = "journal+console";
};
script = ''
set -euo pipefail
if [ ! -s /etc/plasmacloud/node-config.json ]; then
echo "ERROR: node-config.json missing (bootstrap not complete?)"
exit 1
fi
NODE_ID=$(${pkgs.jq}/bin/jq -r '.hostname // empty' /etc/plasmacloud/node-config.json)
NODE_IP=$(${pkgs.jq}/bin/jq -r '.ip // empty' /etc/plasmacloud/node-config.json)
if [ -z "$NODE_ID" ] || [ -z "$NODE_IP" ]; then
echo "ERROR: node-config.json missing hostname/ip"
exit 1
fi
# Safety guard: only install for known VM cluster nodes
case "$NODE_ID" in
node01|node02|node03) ;;
*)
echo "Skipping install: unexpected node_id '$NODE_ID'"
exit 0
;;
esac
# Accept 10.0.1.x (cluster config) or 192.168.100.x (T036 config)
case "$NODE_IP" in
10.0.1.*|192.168.100.*) ;;
*)
echo "Skipping install: unexpected ip '$NODE_IP'"
exit 0
;;
esac
echo "PlasmaCloud install starting for $NODE_ID (ip=$NODE_IP)"
# Find disk
DISK=$(${pkgs.util-linux}/bin/lsblk -dpno NAME,TYPE | ${pkgs.gawk}/bin/awk '$2=="disk"{print $1; exit}')
if [ -z "$DISK" ]; then
echo "ERROR: No disk found"
exit 1
fi
ROOT_PART="''${DISK}2"
mkdir -p /mnt
# Skip if already installed
if ${pkgs.util-linux}/bin/lsblk -no FSTYPE "$ROOT_PART" 2>/dev/null | ${pkgs.gnugrep}/bin/grep -q '^ext4$'; then
mount "$ROOT_PART" /mnt 2>/dev/null || true
if [ -e /mnt/etc/NIXOS ]; then
echo " Existing NixOS detected; skipping install"
umount /mnt || true
exit 0
fi
umount /mnt || true
fi
echo "Running disko to partition $DISK..."
export NIX_CONFIG="experimental-features = nix-command flakes"
nix run github:nix-community/disko -- --mode disko /opt/plasmacloud-src/docs/por/T036-vm-cluster-deployment/$NODE_ID/disko.nix
echo "Running nixos-install..."
nixos-install --flake /opt/plasmacloud-src#"$NODE_ID" --no-root-passwd
sync
echo " Install complete; rebooting..."
${pkgs.systemd}/bin/systemctl reboot
'';
};
# Packages for bootstrap + install
environment.systemPackages = with pkgs; [
curl jq vim htop gawk gnugrep util-linux parted dosfstools e2fsprogs
];
# SSH with key-based auth for non-interactive access
services.openssh = {
enable = true;
settings.PermitRootLogin = "prohibit-password";
};
# VM cluster SSH key (same as T036 nodes)
users.users.root.openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICaSw8CP4Si0Cn0WpYMhgdYNvsR3qFO0ZFiRjpGZXd6S centra@cn-nixos-think"
];
# Fallback password for emergency VNC access
users.users.root.initialPassword = "bootstrap";
}