373 lines
11 KiB
Nix
373 lines
11 KiB
Nix
# PhotonCloud 6-Node Test Cluster
|
|
#
|
|
# Common configuration shared by all nodes
|
|
#
|
|
# Usage: Import this from individual node configurations
|
|
|
|
{ config, lib, pkgs, modulesPath, ... }:
|
|
|
|
let
|
|
cfg = config.photonTestCluster;
|
|
in
|
|
{
|
|
imports = [
|
|
(modulesPath + "/virtualisation/qemu-vm.nix")
|
|
../modules/plasmacloud-cluster.nix
|
|
];
|
|
|
|
options.photonTestCluster = {
|
|
sshBasePort = lib.mkOption {
|
|
type = lib.types.port;
|
|
default = 2200;
|
|
description = "Base host port used for guest SSH forwarding.";
|
|
};
|
|
|
|
vdeSock = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "/tmp/photoncloud-test-cluster-vde.sock";
|
|
description = "VDE control socket path used for the east-west cluster NIC.";
|
|
};
|
|
|
|
chainfireControlPlaneAddrs = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "10.100.0.11:2379,10.100.0.12:2379,10.100.0.13:2379";
|
|
description = "Comma-separated ChainFire client endpoints for multi-endpoint failover.";
|
|
};
|
|
|
|
flaredbControlPlaneAddrs = lib.mkOption {
|
|
type = lib.types.str;
|
|
default = "10.100.0.11:2479,10.100.0.12:2479,10.100.0.13:2479";
|
|
description = "Comma-separated FlareDB client endpoints for multi-endpoint failover.";
|
|
};
|
|
};
|
|
|
|
config = {
|
|
virtualisation = let
|
|
# Extract node index (e.g., "node01" -> "1")
|
|
nodeIndex = lib.strings.toInt (lib.strings.removePrefix "node0" config.networking.hostName);
|
|
macSuffix = lib.strings.fixedWidthString 2 "0" (toString nodeIndex);
|
|
vdeSock = cfg.vdeSock;
|
|
in {
|
|
graphics = false;
|
|
cores = 2;
|
|
forwardPorts =
|
|
[
|
|
{ from = "host"; host.port = cfg.sshBasePort + nodeIndex; guest.port = 22; }
|
|
]
|
|
++ lib.optionals (config.networking.hostName == "node06") [
|
|
{ from = "host"; host.port = 8080; guest.port = 8080; }
|
|
{ from = "host"; host.port = 8443; guest.port = 8443; }
|
|
{ from = "host"; host.port = 9090; guest.port = 9090; }
|
|
{ from = "host"; host.port = 3000; guest.port = 3000; }
|
|
];
|
|
qemu.options = [
|
|
# Nested KVM validation requires hardware acceleration and host CPU flags.
|
|
"-enable-kvm"
|
|
"-cpu host"
|
|
# eth1: Cluster network shared across all VMs. VDE is materially faster
|
|
# than multicast sockets for this nested-QEMU storage lab.
|
|
"-netdev vde,id=n1,sock=${vdeSock}"
|
|
"-device virtio-net-pci,netdev=n1,mac=52:54:00:10:00:${macSuffix}"
|
|
];
|
|
};
|
|
|
|
networking.firewall.enable = false;
|
|
services.openssh = {
|
|
enable = true;
|
|
settings = {
|
|
KbdInteractiveAuthentication = false;
|
|
PasswordAuthentication = true;
|
|
PermitRootLogin = "yes";
|
|
};
|
|
};
|
|
users.mutableUsers = false;
|
|
users.users.root.hashedPassword = "$6$photoncloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe.";
|
|
|
|
# qemu-vm.nix provides the default SLiRP NIC as eth0.
|
|
# The extra multicast NIC above becomes eth1 and carries intra-cluster traffic.
|
|
networking.interfaces.eth0.useDHCP = true;
|
|
|
|
boot.loader.grub.device = "nodev";
|
|
boot.kernelModules = [ "nbd" ];
|
|
boot.extraModprobeConfig = ''
|
|
options nbd nbds_max=16 max_part=8
|
|
'';
|
|
fileSystems."/" = { device = "/dev/disk/by-label/nixos"; fsType = "ext4"; };
|
|
|
|
system.stateVersion = "24.05";
|
|
|
|
systemd.services.photon-test-cluster-net-tuning = {
|
|
description = "Tune cluster NIC offloads for nested-QEMU storage tests";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
RemainAfterExit = true;
|
|
};
|
|
path = [ pkgs.ethtool pkgs.iproute2 pkgs.coreutils ];
|
|
script = ''
|
|
set -eu
|
|
iface="eth1"
|
|
for _ in $(seq 1 30); do
|
|
if ip link show "$iface" >/dev/null 2>&1; then
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
if ! ip link show "$iface" >/dev/null 2>&1; then
|
|
echo "photon-test-cluster-net-tuning: $iface not present, skipping" >&2
|
|
exit 0
|
|
fi
|
|
|
|
# Nested QEMU over VDE is sensitive to guest-side offloads; disabling
|
|
# them reduces retransmits and keeps the storage benchmarks closer to
|
|
# raw TCP throughput.
|
|
ethtool -K "$iface" tso off gso off gro off tx off rx off sg off || true
|
|
ip link set dev "$iface" txqueuelen 10000 || true
|
|
'';
|
|
};
|
|
|
|
environment.systemPackages = with pkgs; [
|
|
awscli2
|
|
curl
|
|
dnsutils
|
|
ethtool
|
|
fio
|
|
jq
|
|
grpcurl
|
|
htop
|
|
iperf3
|
|
(python3.withPackages (ps: [ ps.boto3 ]))
|
|
vim
|
|
netcat
|
|
iproute2
|
|
tcpdump
|
|
pciutils # lspci for debugging
|
|
qemu
|
|
];
|
|
|
|
plasmacloud.cluster = {
|
|
enable = true;
|
|
name = "photoncloud-test";
|
|
|
|
nodes = {
|
|
node01 = {
|
|
role = "control-plane";
|
|
ip = "10.100.0.11";
|
|
services = [ "chainfire" "flaredb" "iam" "prismnet" "flashdns" "fiberlb" "k8shost" "plasmavmc" "lightningstor" "coronafs" ];
|
|
labels = {
|
|
tier = "control-plane";
|
|
};
|
|
pool = "control";
|
|
nodeClass = "control-plane";
|
|
failureDomain = "zone-a";
|
|
raftPort = 2380;
|
|
apiPort = 2379;
|
|
};
|
|
node02 = {
|
|
role = "control-plane";
|
|
ip = "10.100.0.12";
|
|
services = [ "chainfire" "flaredb" "iam" ];
|
|
labels = {
|
|
tier = "control-plane";
|
|
};
|
|
pool = "control";
|
|
nodeClass = "control-plane";
|
|
failureDomain = "zone-b";
|
|
raftPort = 2380;
|
|
apiPort = 2379;
|
|
};
|
|
node03 = {
|
|
role = "control-plane";
|
|
ip = "10.100.0.13";
|
|
services = [ "chainfire" "flaredb" "iam" ];
|
|
labels = {
|
|
tier = "control-plane";
|
|
};
|
|
pool = "control";
|
|
nodeClass = "control-plane";
|
|
failureDomain = "zone-c";
|
|
raftPort = 2380;
|
|
apiPort = 2379;
|
|
};
|
|
node04 = {
|
|
role = "worker";
|
|
ip = "10.100.0.21";
|
|
services = [ "plasmavmc-agent" "lightningstor-data" "node-agent" ];
|
|
labels = {
|
|
runtime = "native";
|
|
};
|
|
pool = "general";
|
|
nodeClass = "worker-linux";
|
|
failureDomain = "zone-b";
|
|
state = "provisioning";
|
|
raftPort = 2380;
|
|
apiPort = 2379;
|
|
};
|
|
node05 = {
|
|
role = "worker";
|
|
ip = "10.100.0.22";
|
|
services = [ "plasmavmc-agent" "lightningstor-data" "node-agent" ];
|
|
labels = {
|
|
runtime = "native";
|
|
};
|
|
pool = "general";
|
|
nodeClass = "worker-linux";
|
|
failureDomain = "zone-c";
|
|
state = "provisioning";
|
|
raftPort = 2380;
|
|
apiPort = 2379;
|
|
};
|
|
node06 = {
|
|
role = "control-plane";
|
|
ip = "10.100.0.100";
|
|
services = [ "apigateway" "nightlight" "creditservice" "deployer" "fleet-scheduler" ];
|
|
labels = {
|
|
tier = "control-plane";
|
|
ingress = "true";
|
|
};
|
|
pool = "control";
|
|
nodeClass = "control-plane";
|
|
failureDomain = "zone-a";
|
|
raftPort = 2380;
|
|
apiPort = 2379;
|
|
};
|
|
};
|
|
|
|
deployer = {
|
|
clusterId = "test-cluster";
|
|
environment = "test";
|
|
|
|
nodeClasses = {
|
|
control-plane = {
|
|
description = "Control-plane services and management endpoints";
|
|
roles = [ "control-plane" ];
|
|
labels = {
|
|
tier = "control-plane";
|
|
};
|
|
};
|
|
|
|
worker-linux = {
|
|
description = "General-purpose native runtime workers";
|
|
roles = [ "worker" ];
|
|
labels = {
|
|
tier = "general";
|
|
runtime = "native";
|
|
};
|
|
};
|
|
};
|
|
|
|
pools = {
|
|
control = {
|
|
description = "Control-plane pool";
|
|
nodeClass = "control-plane";
|
|
labels = {
|
|
plane = "control";
|
|
};
|
|
};
|
|
|
|
general = {
|
|
description = "General-purpose native worker pool";
|
|
nodeClass = "worker-linux";
|
|
labels = {
|
|
"pool.photoncloud.io/name" = "general";
|
|
};
|
|
};
|
|
};
|
|
|
|
services = {
|
|
native-web = {
|
|
protocol = "http";
|
|
ports.http = 18190;
|
|
schedule = {
|
|
replicas = 2;
|
|
placement = {
|
|
roles = [ "worker" ];
|
|
pools = [ "general" ];
|
|
nodeClasses = [ "worker-linux" ];
|
|
matchLabels = {
|
|
runtime = "native";
|
|
};
|
|
spreadByLabel = "failure_domain";
|
|
maxInstancesPerNode = 1;
|
|
};
|
|
instancePort = 18190;
|
|
process = {
|
|
command = "python3";
|
|
args = [
|
|
"-m"
|
|
"http.server"
|
|
"\${INSTANCE_PORT}"
|
|
"--bind"
|
|
"\${INSTANCE_IP}"
|
|
];
|
|
};
|
|
healthCheck = {
|
|
type = "http";
|
|
path = "/";
|
|
intervalSecs = 5;
|
|
timeoutSecs = 3;
|
|
};
|
|
};
|
|
publish = {
|
|
dns = {
|
|
zone = "native.cluster.test";
|
|
name = "web";
|
|
ttl = 30;
|
|
mode = "load_balancer";
|
|
};
|
|
loadBalancer = {
|
|
orgId = "native-services";
|
|
projectId = "test-cluster";
|
|
listenerPort = 18191;
|
|
protocol = "http";
|
|
poolProtocol = "http";
|
|
};
|
|
};
|
|
};
|
|
|
|
native-container = {
|
|
protocol = "http";
|
|
ports.http = 18192;
|
|
schedule = {
|
|
replicas = 1;
|
|
placement = {
|
|
roles = [ "worker" ];
|
|
pools = [ "general" ];
|
|
nodeClasses = [ "worker-linux" ];
|
|
matchLabels = {
|
|
runtime = "native";
|
|
};
|
|
maxInstancesPerNode = 1;
|
|
};
|
|
instancePort = 18192;
|
|
container = {
|
|
image = "docker.io/library/nginx:1.27-alpine";
|
|
runtime = "podman";
|
|
pullPolicy = "if-not-present";
|
|
ports = [
|
|
{
|
|
containerPort = 80;
|
|
hostPort = 18192;
|
|
protocol = "tcp";
|
|
}
|
|
];
|
|
};
|
|
healthCheck = {
|
|
type = "http";
|
|
path = "/";
|
|
intervalSecs = 5;
|
|
timeoutSecs = 5;
|
|
startupGraceSecs = 120;
|
|
};
|
|
};
|
|
};
|
|
};
|
|
};
|
|
|
|
bootstrap.initialPeers = [ "node01" "node02" "node03" ];
|
|
bgp.asn = 64512;
|
|
};
|
|
};
|
|
}
|