# PhotonCloud 6-Node Test Cluster # # Common configuration shared by all nodes # # Usage: Import this from individual node configurations { config, lib, pkgs, modulesPath, ... }: let cfg = config.photonTestCluster; in { imports = [ (modulesPath + "/virtualisation/qemu-vm.nix") ../modules/plasmacloud-cluster.nix ]; options.photonTestCluster = { sshBasePort = lib.mkOption { type = lib.types.port; default = 2200; description = "Base host port used for guest SSH forwarding."; }; vdeSock = lib.mkOption { type = lib.types.str; default = "/tmp/photoncloud-test-cluster-vde.sock"; description = "VDE control socket path used for the east-west cluster NIC."; }; chainfireControlPlaneAddrs = lib.mkOption { type = lib.types.str; default = "10.100.0.11:2379,10.100.0.12:2379,10.100.0.13:2379"; description = "Comma-separated ChainFire client endpoints for multi-endpoint failover."; }; flaredbControlPlaneAddrs = lib.mkOption { type = lib.types.str; default = "10.100.0.11:2479,10.100.0.12:2479,10.100.0.13:2479"; description = "Comma-separated FlareDB client endpoints for multi-endpoint failover."; }; }; config = { virtualisation = let # Extract node index (e.g., "node01" -> "1") nodeIndex = lib.strings.toInt (lib.strings.removePrefix "node0" config.networking.hostName); macSuffix = lib.strings.fixedWidthString 2 "0" (toString nodeIndex); vdeSock = cfg.vdeSock; in { graphics = false; cores = 2; forwardPorts = [ { from = "host"; host.port = cfg.sshBasePort + nodeIndex; guest.port = 22; } ] ++ lib.optionals (config.networking.hostName == "node06") [ { from = "host"; host.port = 8080; guest.port = 8080; } { from = "host"; host.port = 8443; guest.port = 8443; } { from = "host"; host.port = 9090; guest.port = 9090; } { from = "host"; host.port = 3000; guest.port = 3000; } ]; qemu.options = [ # Nested KVM validation requires hardware acceleration and host CPU flags. "-enable-kvm" "-cpu host" # eth1: Cluster network shared across all VMs. VDE is materially faster # than multicast sockets for this nested-QEMU storage lab. "-netdev vde,id=n1,sock=${vdeSock}" "-device virtio-net-pci,netdev=n1,mac=52:54:00:10:00:${macSuffix}" ]; }; networking.firewall.enable = false; services.openssh = { enable = true; settings = { KbdInteractiveAuthentication = false; PasswordAuthentication = true; PermitRootLogin = "yes"; }; }; users.mutableUsers = false; users.users.root.hashedPassword = "$6$photoncloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe."; # qemu-vm.nix provides the default SLiRP NIC as eth0. # The extra multicast NIC above becomes eth1 and carries intra-cluster traffic. networking.interfaces.eth0.useDHCP = true; boot.loader.grub.device = "nodev"; boot.kernelModules = [ "nbd" ]; boot.extraModprobeConfig = '' options nbd nbds_max=16 max_part=8 ''; fileSystems."/" = { device = "/dev/disk/by-label/nixos"; fsType = "ext4"; }; system.stateVersion = "24.05"; systemd.services.photon-test-cluster-net-tuning = { description = "Tune cluster NIC offloads for nested-QEMU storage tests"; wantedBy = [ "multi-user.target" ]; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; serviceConfig = { Type = "oneshot"; RemainAfterExit = true; }; path = [ pkgs.ethtool pkgs.iproute2 pkgs.coreutils ]; script = '' set -eu iface="eth1" for _ in $(seq 1 30); do if ip link show "$iface" >/dev/null 2>&1; then break fi sleep 1 done if ! ip link show "$iface" >/dev/null 2>&1; then echo "photon-test-cluster-net-tuning: $iface not present, skipping" >&2 exit 0 fi # Nested QEMU over VDE is sensitive to guest-side offloads; disabling # them reduces retransmits and keeps the storage benchmarks closer to # raw TCP throughput. ethtool -K "$iface" tso off gso off gro off tx off rx off sg off || true ip link set dev "$iface" txqueuelen 10000 || true ''; }; environment.systemPackages = with pkgs; [ awscli2 curl dnsutils ethtool fio jq grpcurl htop iperf3 (python3.withPackages (ps: [ ps.boto3 ])) vim netcat iproute2 tcpdump pciutils # lspci for debugging qemu ]; plasmacloud.cluster = { enable = true; name = "photoncloud-test"; nodes = { node01 = { role = "control-plane"; ip = "10.100.0.11"; services = [ "chainfire" "flaredb" "iam" "prismnet" "flashdns" "fiberlb" "k8shost" "plasmavmc" "lightningstor" "coronafs" ]; labels = { tier = "control-plane"; }; pool = "control"; nodeClass = "control-plane"; failureDomain = "zone-a"; raftPort = 2380; apiPort = 2379; }; node02 = { role = "control-plane"; ip = "10.100.0.12"; services = [ "chainfire" "flaredb" "iam" ]; labels = { tier = "control-plane"; }; pool = "control"; nodeClass = "control-plane"; failureDomain = "zone-b"; raftPort = 2380; apiPort = 2379; }; node03 = { role = "control-plane"; ip = "10.100.0.13"; services = [ "chainfire" "flaredb" "iam" ]; labels = { tier = "control-plane"; }; pool = "control"; nodeClass = "control-plane"; failureDomain = "zone-c"; raftPort = 2380; apiPort = 2379; }; node04 = { role = "worker"; ip = "10.100.0.21"; services = [ "plasmavmc-agent" "lightningstor-data" "node-agent" ]; labels = { runtime = "native"; }; pool = "general"; nodeClass = "worker-linux"; failureDomain = "zone-b"; state = "provisioning"; raftPort = 2380; apiPort = 2379; }; node05 = { role = "worker"; ip = "10.100.0.22"; services = [ "plasmavmc-agent" "lightningstor-data" "node-agent" ]; labels = { runtime = "native"; }; pool = "general"; nodeClass = "worker-linux"; failureDomain = "zone-c"; state = "provisioning"; raftPort = 2380; apiPort = 2379; }; node06 = { role = "control-plane"; ip = "10.100.0.100"; services = [ "apigateway" "nightlight" "creditservice" "deployer" "fleet-scheduler" ]; labels = { tier = "control-plane"; ingress = "true"; }; pool = "control"; nodeClass = "control-plane"; failureDomain = "zone-a"; raftPort = 2380; apiPort = 2379; }; }; deployer = { clusterId = "test-cluster"; environment = "test"; nodeClasses = { control-plane = { description = "Control-plane services and management endpoints"; roles = [ "control-plane" ]; labels = { tier = "control-plane"; }; }; worker-linux = { description = "General-purpose native runtime workers"; roles = [ "worker" ]; labels = { tier = "general"; runtime = "native"; }; }; }; pools = { control = { description = "Control-plane pool"; nodeClass = "control-plane"; labels = { plane = "control"; }; }; general = { description = "General-purpose native worker pool"; nodeClass = "worker-linux"; labels = { "pool.photoncloud.io/name" = "general"; }; }; }; services = { native-web = { protocol = "http"; ports.http = 18190; schedule = { replicas = 2; placement = { roles = [ "worker" ]; pools = [ "general" ]; nodeClasses = [ "worker-linux" ]; matchLabels = { runtime = "native"; }; spreadByLabel = "failure_domain"; maxInstancesPerNode = 1; }; instancePort = 18190; process = { command = "python3"; args = [ "-m" "http.server" "\${INSTANCE_PORT}" "--bind" "\${INSTANCE_IP}" ]; }; healthCheck = { type = "http"; path = "/"; intervalSecs = 5; timeoutSecs = 3; }; }; publish = { dns = { zone = "native.cluster.test"; name = "web"; ttl = 30; mode = "load_balancer"; }; loadBalancer = { orgId = "native-services"; projectId = "test-cluster"; listenerPort = 18191; protocol = "http"; poolProtocol = "http"; }; }; }; native-container = { protocol = "http"; ports.http = 18192; schedule = { replicas = 1; placement = { roles = [ "worker" ]; pools = [ "general" ]; nodeClasses = [ "worker-linux" ]; matchLabels = { runtime = "native"; }; maxInstancesPerNode = 1; }; instancePort = 18192; container = { image = "docker.io/library/nginx:1.27-alpine"; runtime = "podman"; pullPolicy = "if-not-present"; ports = [ { containerPort = 80; hostPort = 18192; protocol = "tcp"; } ]; }; healthCheck = { type = "http"; path = "/"; intervalSecs = 5; timeoutSecs = 5; startupGraceSecs = 120; }; }; }; }; }; bootstrap.initialPeers = [ "node01" "node02" "node03" ]; bgp.asn = 64512; }; }; }