photoncloud-monorepo/nix/test-cluster/common.nix

282 lines
8.3 KiB
Nix

# PhotonCloud 6-Node Test Cluster
#
# Common configuration shared by all nodes
#
# Usage: Import this from individual node configurations
{ config, lib, pkgs, modulesPath, ... }:
let
cfg = config.photonTestCluster;
in
{
imports = [
(modulesPath + "/virtualisation/qemu-vm.nix")
../modules/plasmacloud-cluster.nix
];
options.photonTestCluster = {
sshBasePort = lib.mkOption {
type = lib.types.port;
default = 2200;
description = "Base host port used for guest SSH forwarding.";
};
vdeSock = lib.mkOption {
type = lib.types.str;
default = "/tmp/photoncloud-test-cluster-vde.sock";
description = "VDE control socket path used for the east-west cluster NIC.";
};
chainfireControlPlaneAddrs = lib.mkOption {
type = lib.types.str;
default = "10.100.0.11:2379,10.100.0.12:2379,10.100.0.13:2379";
description = "Comma-separated ChainFire client endpoints for multi-endpoint failover.";
};
flaredbControlPlaneAddrs = lib.mkOption {
type = lib.types.str;
default = "10.100.0.11:2479,10.100.0.12:2479,10.100.0.13:2479";
description = "Comma-separated FlareDB client endpoints for multi-endpoint failover.";
};
};
config = {
virtualisation = let
# Extract node index (e.g., "node01" -> "1")
nodeIndex = lib.strings.toInt (lib.strings.removePrefix "node0" config.networking.hostName);
macSuffix = lib.strings.fixedWidthString 2 "0" (toString nodeIndex);
vdeSock = cfg.vdeSock;
in {
graphics = false;
cores = 2;
forwardPorts =
[
{ from = "host"; host.port = cfg.sshBasePort + nodeIndex; guest.port = 22; }
]
++ lib.optionals (config.networking.hostName == "node06") [
{ from = "host"; host.port = 8080; guest.port = 8080; }
{ from = "host"; host.port = 8443; guest.port = 8443; }
{ from = "host"; host.port = 9090; guest.port = 9090; }
{ from = "host"; host.port = 3000; guest.port = 3000; }
];
qemu.options = [
# Nested KVM validation requires hardware acceleration and host CPU flags.
"-enable-kvm"
"-cpu host"
# eth1: Cluster network shared across all VMs. VDE is materially faster
# than multicast sockets for this nested-QEMU storage lab.
"-netdev vde,id=n1,sock=${vdeSock}"
"-device virtio-net-pci,netdev=n1,mac=52:54:00:10:00:${macSuffix}"
];
};
networking.firewall.enable = false;
services.openssh = {
enable = true;
settings = {
KbdInteractiveAuthentication = false;
PasswordAuthentication = true;
PermitRootLogin = "yes";
};
};
users.mutableUsers = false;
users.users.root.hashedPassword = "$6$photoncloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe.";
# qemu-vm.nix provides the default SLiRP NIC as eth0.
# The extra multicast NIC above becomes eth1 and carries intra-cluster traffic.
networking.interfaces.eth0.useDHCP = true;
boot.loader.grub.device = "nodev";
boot.kernelModules = [ "nbd" ];
boot.extraModprobeConfig = ''
options nbd nbds_max=16 max_part=8
'';
fileSystems."/" = { device = "/dev/disk/by-label/nixos"; fsType = "ext4"; };
system.stateVersion = "24.05";
systemd.services.photon-test-cluster-net-tuning = {
description = "Tune cluster NIC offloads for nested-QEMU storage tests";
wantedBy = [ "multi-user.target" ];
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
path = [ pkgs.ethtool pkgs.iproute2 pkgs.coreutils ];
script = ''
set -eu
iface="eth1"
for _ in $(seq 1 30); do
if ip link show "$iface" >/dev/null 2>&1; then
break
fi
sleep 1
done
if ! ip link show "$iface" >/dev/null 2>&1; then
echo "photon-test-cluster-net-tuning: $iface not present, skipping" >&2
exit 0
fi
# Nested QEMU over VDE is sensitive to guest-side offloads; disabling
# them reduces retransmits and keeps the storage benchmarks closer to
# raw TCP throughput.
ethtool -K "$iface" tso off gso off gro off tx off rx off sg off || true
ip link set dev "$iface" txqueuelen 10000 || true
'';
};
environment.systemPackages = with pkgs; [
awscli2
curl
dnsutils
ethtool
fio
jq
grpcurl
htop
iperf3
(python3.withPackages (ps: [ ps.boto3 ]))
vim
netcat
iproute2
tcpdump
pciutils # lspci for debugging
qemu
];
plasmacloud.cluster = {
enable = true;
name = "photoncloud-test";
nodes = {
node01 = {
role = "control-plane";
ip = "10.100.0.11";
services = [ "chainfire" "flaredb" "iam" "prismnet" "flashdns" "fiberlb" "k8shost" "plasmavmc" "lightningstor" "coronafs" ];
labels = {
tier = "control-plane";
};
pool = "control";
nodeClass = "control-plane";
failureDomain = "zone-a";
raftPort = 2380;
apiPort = 2379;
};
node02 = {
role = "control-plane";
ip = "10.100.0.12";
services = [ "chainfire" "flaredb" "iam" ];
labels = {
tier = "control-plane";
};
pool = "control";
nodeClass = "control-plane";
failureDomain = "zone-b";
raftPort = 2380;
apiPort = 2379;
};
node03 = {
role = "control-plane";
ip = "10.100.0.13";
services = [ "chainfire" "flaredb" "iam" ];
labels = {
tier = "control-plane";
};
pool = "control";
nodeClass = "control-plane";
failureDomain = "zone-c";
raftPort = 2380;
apiPort = 2379;
};
node04 = {
role = "worker";
ip = "10.100.0.21";
services = [ "plasmavmc-agent" "lightningstor-data" "node-agent" ];
labels = {
runtime = "native";
};
pool = "general";
nodeClass = "worker-linux";
failureDomain = "zone-b";
raftPort = 2380;
apiPort = 2379;
};
node05 = {
role = "worker";
ip = "10.100.0.22";
services = [ "plasmavmc-agent" "lightningstor-data" "node-agent" ];
labels = {
runtime = "native";
};
pool = "general";
nodeClass = "worker-linux";
failureDomain = "zone-c";
raftPort = 2380;
apiPort = 2379;
};
node06 = {
role = "control-plane";
ip = "10.100.0.100";
services = [ "apigateway" "nightlight" "creditservice" "deployer" "fleet-scheduler" ];
labels = {
tier = "control-plane";
ingress = "true";
};
pool = "control";
nodeClass = "control-plane";
failureDomain = "zone-a";
raftPort = 2380;
apiPort = 2379;
};
};
deployer = {
clusterId = "test-cluster";
environment = "test";
nodeClasses = {
control-plane = {
description = "Control-plane services and management endpoints";
roles = [ "control-plane" ];
labels = {
tier = "control-plane";
};
};
worker-linux = {
description = "General-purpose native runtime workers";
roles = [ "worker" ];
labels = {
tier = "general";
runtime = "native";
};
};
};
pools = {
control = {
description = "Control-plane pool";
nodeClass = "control-plane";
labels = {
plane = "control";
};
};
general = {
description = "General-purpose native worker pool";
nodeClass = "worker-linux";
labels = {
"pool.photoncloud.io/name" = "general";
};
};
};
};
bootstrap.initialPeers = [ "node01" "node02" "node03" ];
bgp.asn = 64512;
};
};
}