photoncloud-monorepo/nix/test-cluster/common.nix
centra 37f5479ab8
Some checks failed
Nix CI / filter (push) Failing after 1s
Nix CI / gate () (push) Has been skipped
Nix CI / gate (shared crates) (push) Has been skipped
Nix CI / build () (push) Has been skipped
Nix CI / ci-status (push) Failing after 1s
Add daemon scheduling for native services
2026-03-30 21:31:32 +09:00

409 lines
12 KiB
Nix

# PhotonCloud 6-Node Test Cluster
#
# Common configuration shared by all nodes
#
# Usage: Import this from individual node configurations
{ config, lib, pkgs, modulesPath, ... }:
let
cfg = config.photonTestCluster;
in
{
imports = [
(modulesPath + "/virtualisation/qemu-vm.nix")
../../nix-nos/modules/default.nix
../modules/plasmacloud-cluster.nix
];
options.photonTestCluster = {
sshBasePort = lib.mkOption {
type = lib.types.port;
default = 2200;
description = "Base host port used for guest SSH forwarding.";
};
vdeSock = lib.mkOption {
type = lib.types.str;
default = "/tmp/photoncloud-test-cluster-vde.sock";
description = "VDE control socket path used for the east-west cluster NIC.";
};
chainfireControlPlaneAddrs = lib.mkOption {
type = lib.types.str;
default = "10.100.0.11:2379,10.100.0.12:2379,10.100.0.13:2379";
description = "Comma-separated ChainFire client endpoints for multi-endpoint failover.";
};
flaredbControlPlaneAddrs = lib.mkOption {
type = lib.types.str;
default = "10.100.0.11:2479,10.100.0.12:2479,10.100.0.13:2479";
description = "Comma-separated FlareDB client endpoints for multi-endpoint failover.";
};
};
config = {
virtualisation = let
# Extract node index (e.g., "node01" -> "1")
nodeIndex = lib.strings.toInt (lib.strings.removePrefix "node0" config.networking.hostName);
macSuffix = lib.strings.fixedWidthString 2 "0" (toString nodeIndex);
vdeSock = cfg.vdeSock;
in {
graphics = false;
cores = 2;
forwardPorts =
[
{ from = "host"; host.port = cfg.sshBasePort + nodeIndex; guest.port = 22; }
]
++ lib.optionals (config.networking.hostName == "node06") [
{ from = "host"; host.port = 8080; guest.port = 8080; }
{ from = "host"; host.port = 8443; guest.port = 8443; }
{ from = "host"; host.port = 9090; guest.port = 9090; }
{ from = "host"; host.port = 3000; guest.port = 3000; }
];
qemu.options = [
# Nested KVM validation requires hardware acceleration and host CPU flags.
"-enable-kvm"
"-cpu host"
# eth1: Cluster network shared across all VMs. VDE is materially faster
# than multicast sockets for this nested-QEMU storage lab.
"-netdev vde,id=n1,sock=${vdeSock}"
"-device virtio-net-pci,netdev=n1,mac=52:54:00:10:00:${macSuffix}"
];
};
networking.firewall.enable = false;
services.openssh = {
enable = true;
settings = {
KbdInteractiveAuthentication = false;
PasswordAuthentication = true;
PermitRootLogin = "yes";
};
};
users.mutableUsers = false;
users.users.root.hashedPassword = "$6$photoncloud$aUJCEE5wm/b5O.9KIKGm84qUWdWXwnebsFEiMBF7u9Y7AOWodaMrjbbKGMOf0X59VJyJeMRsgbT7VWeqMHpUe.";
# qemu-vm.nix provides the default SLiRP NIC as eth0.
# The extra multicast NIC above becomes eth1 and carries intra-cluster traffic.
networking.interfaces.eth0.useDHCP = true;
boot.loader.grub.device = "nodev";
boot.kernelModules = [ "nbd" ];
boot.extraModprobeConfig = ''
options nbd nbds_max=16 max_part=8
'';
fileSystems."/" = { device = "/dev/disk/by-label/nixos"; fsType = "ext4"; };
system.stateVersion = "24.05";
systemd.services.photon-test-cluster-net-tuning = {
description = "Tune cluster NIC offloads for nested-QEMU storage tests";
wantedBy = [ "multi-user.target" ];
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
path = [ pkgs.ethtool pkgs.iproute2 pkgs.coreutils ];
script = ''
set -eu
iface="eth1"
for _ in $(seq 1 30); do
if ip link show "$iface" >/dev/null 2>&1; then
break
fi
sleep 1
done
if ! ip link show "$iface" >/dev/null 2>&1; then
echo "photon-test-cluster-net-tuning: $iface not present, skipping" >&2
exit 0
fi
# Nested QEMU over VDE is sensitive to guest-side offloads; disabling
# them reduces retransmits and keeps the storage benchmarks closer to
# raw TCP throughput.
ethtool -K "$iface" tso off gso off gro off tx off rx off sg off || true
ip link set dev "$iface" txqueuelen 10000 || true
'';
};
environment.systemPackages = with pkgs; [
awscli2
curl
dnsutils
ethtool
fio
jq
grpcurl
htop
iperf3
(python3.withPackages (ps: [ ps.boto3 ]))
vim
netcat
iproute2
tcpdump
pciutils # lspci for debugging
qemu
];
plasmacloud.cluster = {
enable = true;
name = "photoncloud-test";
nodes = {
node01 = {
role = "control-plane";
ip = "10.100.0.11";
services = [ "chainfire" "flaredb" "iam" "prismnet" "flashdns" "fiberlb" "k8shost" "plasmavmc" "lightningstor" "coronafs" ];
labels = {
tier = "control-plane";
};
pool = "control";
nodeClass = "control-plane";
failureDomain = "zone-a";
raftPort = 2380;
apiPort = 2379;
};
node02 = {
role = "control-plane";
ip = "10.100.0.12";
services = [ "chainfire" "flaredb" "iam" ];
labels = {
tier = "control-plane";
};
pool = "control";
nodeClass = "control-plane";
failureDomain = "zone-b";
raftPort = 2380;
apiPort = 2379;
};
node03 = {
role = "control-plane";
ip = "10.100.0.13";
services = [ "chainfire" "flaredb" "iam" ];
labels = {
tier = "control-plane";
};
pool = "control";
nodeClass = "control-plane";
failureDomain = "zone-c";
raftPort = 2380;
apiPort = 2379;
};
node04 = {
role = "worker";
ip = "10.100.0.21";
services = [ "plasmavmc-agent" "lightningstor-data" "node-agent" ];
labels = {
runtime = "native";
};
pool = "general";
nodeClass = "worker-linux";
failureDomain = "zone-b";
state = "provisioning";
raftPort = 2380;
apiPort = 2379;
};
node05 = {
role = "worker";
ip = "10.100.0.22";
services = [ "plasmavmc-agent" "lightningstor-data" "node-agent" ];
labels = {
runtime = "native";
};
pool = "general";
nodeClass = "worker-linux";
failureDomain = "zone-c";
state = "provisioning";
raftPort = 2380;
apiPort = 2379;
};
node06 = {
role = "control-plane";
ip = "10.100.0.100";
services = [ "apigateway" "nightlight" "creditservice" "deployer" "fleet-scheduler" ];
labels = {
tier = "control-plane";
ingress = "true";
};
pool = "control";
nodeClass = "control-plane";
failureDomain = "zone-a";
raftPort = 2380;
apiPort = 2379;
};
};
deployer = {
clusterId = "test-cluster";
environment = "test";
nodeClasses = {
control-plane = {
description = "Control-plane services and management endpoints";
roles = [ "control-plane" ];
labels = {
tier = "control-plane";
};
};
worker-linux = {
description = "General-purpose native runtime workers";
roles = [ "worker" ];
labels = {
tier = "general";
runtime = "native";
};
};
};
pools = {
control = {
description = "Control-plane pool";
nodeClass = "control-plane";
labels = {
plane = "control";
};
};
general = {
description = "General-purpose native worker pool";
nodeClass = "worker-linux";
labels = {
"pool.photoncloud.io/name" = "general";
};
};
};
services = {
native-web = {
protocol = "http";
ports.http = 18190;
schedule = {
replicas = 2;
placement = {
roles = [ "worker" ];
pools = [ "general" ];
nodeClasses = [ "worker-linux" ];
matchLabels = {
runtime = "native";
};
spreadByLabel = "failure_domain";
maxInstancesPerNode = 1;
};
instancePort = 18190;
process = {
command = "python3";
args = [
"-m"
"http.server"
"\${INSTANCE_PORT}"
"--bind"
"\${INSTANCE_IP}"
];
};
healthCheck = {
type = "http";
path = "/";
intervalSecs = 5;
timeoutSecs = 3;
};
};
publish = {
dns = {
zone = "native.cluster.test";
name = "web";
ttl = 30;
mode = "load_balancer";
};
loadBalancer = {
orgId = "native-services";
projectId = "test-cluster";
listenerPort = 18191;
protocol = "http";
poolProtocol = "http";
};
};
};
native-container = {
protocol = "http";
ports.http = 18192;
schedule = {
replicas = 1;
placement = {
roles = [ "worker" ];
pools = [ "general" ];
nodeClasses = [ "worker-linux" ];
matchLabels = {
runtime = "native";
};
maxInstancesPerNode = 1;
};
instancePort = 18192;
container = {
image = "docker.io/library/nginx:1.27-alpine";
runtime = "podman";
pullPolicy = "if-not-present";
ports = [
{
containerPort = 80;
hostPort = 18192;
protocol = "tcp";
}
];
};
healthCheck = {
type = "http";
path = "/";
intervalSecs = 5;
timeoutSecs = 5;
startupGraceSecs = 120;
};
};
};
native-daemon = {
protocol = "http";
ports.http = 18193;
schedule = {
mode = "daemon";
replicas = 1;
placement = {
roles = [ "worker" ];
pools = [ "general" ];
nodeClasses = [ "worker-linux" ];
matchLabels = {
runtime = "native";
};
maxInstancesPerNode = 1;
};
instancePort = 18193;
process = {
command = "python3";
args = [
"-m"
"http.server"
"\${INSTANCE_PORT}"
"--bind"
"\${INSTANCE_IP}"
];
};
healthCheck = {
type = "http";
path = "/";
intervalSecs = 5;
timeoutSecs = 3;
};
};
};
};
};
bootstrap.initialPeers = [ "node01" "node02" "node03" ];
bgp.asn = 64512;
};
};
}