photoncloud-monorepo/nix/modules/plasmavmc.nix

384 lines
14 KiB
Nix

{ config, lib, pkgs, ... }:
let
cfg = config.services.plasmavmc;
localIamDeps = lib.optional (config.services.iam.enable or false) "iam.service";
localIamHealthUrl =
if config.services.iam.enable or false
then "http://127.0.0.1:${toString config.services.iam.httpPort}/health"
else null;
remoteIamEndpoint =
if !(config.services.iam.enable or false) && cfg.iamAddr != null
then cfg.iamAddr
else null;
coronafsEnabled = lib.hasAttrByPath [ "services" "coronafs" "enable" ] config && config.services.coronafs.enable;
coronafsDataDir =
if coronafsEnabled && lib.hasAttrByPath [ "services" "coronafs" "dataDir" ] config
then toString config.services.coronafs.dataDir
else null;
effectiveCoronafsControllerEndpoint =
if cfg.coronafsControllerEndpoint != null then cfg.coronafsControllerEndpoint
else if cfg.coronafsEndpoint != null then cfg.coronafsEndpoint
else if coronafsEnabled then "http://127.0.0.1:${toString config.services.coronafs.port}"
else null;
effectiveCoronafsNodeEndpoint =
if cfg.coronafsNodeEndpoint != null then cfg.coronafsNodeEndpoint
else if coronafsEnabled then "http://127.0.0.1:${toString config.services.coronafs.port}"
else if cfg.coronafsEndpoint != null then cfg.coronafsEndpoint
else null;
tomlFormat = pkgs.formats.toml { };
plasmavmcConfigFile = tomlFormat.generate "plasmavmc.toml" {
addr = "0.0.0.0:${toString cfg.port}";
http_addr = "0.0.0.0:${toString cfg.httpPort}";
log_level = "info";
auth = {
iam_server_addr =
if cfg.iamAddr != null
then cfg.iamAddr
else "127.0.0.1:50080";
};
};
in
{
options.services.plasmavmc = {
enable = lib.mkEnableOption "plasmavmc service";
mode = lib.mkOption {
type = lib.types.enum [ "server" "agent" "all-in-one" ];
default = "all-in-one";
description = "PlasmaVMC operating mode: server (control-plane), agent (compute), or all-in-one";
};
port = lib.mkOption {
type = lib.types.port;
default = 50082;
description = "Port for plasmavmc gRPC API";
};
httpPort = lib.mkOption {
type = lib.types.port;
default = 8084;
description = "Port for plasmavmc HTTP REST API";
};
prismnetAddr = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "PrismNET service endpoint address (host:port)";
example = "10.0.0.1:50081";
};
iamAddr = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "IAM service endpoint address (host:port)";
example = "10.0.0.1:50080";
};
chainfireAddr = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "ChainFire endpoint address (host:port) for cluster coordination only";
example = "10.0.0.1:2379";
};
flaredbAddr = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "FlareDB endpoint address (host:port) for metadata/user data";
example = "10.0.0.1:2479";
};
controlPlaneAddr = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "PlasmaVMC control-plane gRPC endpoint (host:port) for agent heartbeats.";
example = "10.0.0.11:50082";
};
advertiseAddr = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "Advertised PlasmaVMC gRPC endpoint for scheduler forwarding (host:port).";
example = "10.0.0.21:50082";
};
lightningstorAddr = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "LightningStor gRPC endpoint (host:port) for VM image artifacts.";
example = "10.0.0.11:50086";
};
coronafsEndpoint = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "Deprecated combined CoronaFS HTTP endpoint used to provision and export managed VM volumes.";
example = "http://10.0.0.11:50088";
};
coronafsControllerEndpoint = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "CoronaFS controller HTTP endpoint used to provision and resize managed VM volumes. Comma-separated endpoints are allowed for client-side failover.";
example = "http://10.0.0.11:50088";
};
coronafsNodeEndpoint = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "CoronaFS node-local HTTP endpoint used to resolve local paths and exports for attached VM volumes. Comma-separated endpoints are allowed for client-side failover.";
example = "http://127.0.0.1:50088";
};
coronafsNodeLocalAttach = lib.mkOption {
type = lib.types.bool;
default = false;
description = ''
Enable writable VM attachment through node-local CoronaFS materialization.
This requires services.plasmavmc.sharedLiveMigration = false because migrations use cold relocate plus flush-back.
'';
};
experimentalCoronafsNodeLocalAttach = lib.mkOption {
type = lib.types.bool;
default = false;
description = ''
Deprecated alias for services.plasmavmc.coronafsNodeLocalAttach.
'';
};
managedVolumeRoot = lib.mkOption {
type = lib.types.path;
default = "/var/lib/plasmavmc/managed-volumes";
description = "Local root directory used for PlasmaVMC managed raw volumes.";
};
sharedLiveMigration = lib.mkOption {
type = lib.types.bool;
default = true;
description = "Whether this node advertises shared-storage live migration capability.";
};
cephMonitors = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
description = "Optional Ceph monitor endpoints used to enable Ceph RBD volumes.";
example = [ "10.0.0.31:6789" "10.0.0.32:6789" "10.0.0.33:6789" ];
};
cephClusterId = lib.mkOption {
type = lib.types.str;
default = "default";
description = "Ceph cluster identifier expected by registered Ceph RBD volumes.";
};
cephUser = lib.mkOption {
type = lib.types.str;
default = "admin";
description = "Ceph user passed to QEMU RBD attachments.";
};
cephSecret = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "Optional Ceph secret used for QEMU RBD attachments.";
};
nodeId = lib.mkOption {
type = lib.types.str;
default = config.networking.hostName;
description = "Node identifier used when running in agent/all-in-one mode.";
};
nodeName = lib.mkOption {
type = lib.types.str;
default = config.networking.hostName;
description = "Node display name used in PlasmaVMC heartbeats.";
};
heartbeatIntervalSeconds = lib.mkOption {
type = lib.types.int;
default = 5;
description = "Heartbeat interval for PlasmaVMC agents.";
};
dataDir = lib.mkOption {
type = lib.types.path;
default = "/var/lib/plasmavmc";
description = "Data directory for plasmavmc";
};
settings = lib.mkOption {
type = lib.types.attrs;
default = {};
description = "Additional configuration settings";
};
package = lib.mkOption {
type = lib.types.package;
default = pkgs.plasmavmc-server or (throw "plasmavmc-server package not found");
description = "Package to use for plasmavmc";
};
};
config = lib.mkIf cfg.enable {
assertions = [
{
assertion = !((cfg.coronafsNodeLocalAttach || cfg.experimentalCoronafsNodeLocalAttach) && cfg.sharedLiveMigration);
message = ''
services.plasmavmc.coronafsNodeLocalAttach requires services.plasmavmc.sharedLiveMigration = false
because writable node-local CoronaFS attachment uses cold relocate plus flush-back instead of shared-storage live migration.
'';
}
];
warnings =
lib.optional (cfg.coronafsEndpoint != null) ''
services.plasmavmc.coronafsEndpoint is deprecated; use services.plasmavmc.coronafsControllerEndpoint and services.plasmavmc.coronafsNodeEndpoint.
''
++ lib.optional (cfg.experimentalCoronafsNodeLocalAttach) ''
services.plasmavmc.experimentalCoronafsNodeLocalAttach is deprecated; use services.plasmavmc.coronafsNodeLocalAttach.
'';
# Create system user
users.users.plasmavmc = {
isSystemUser = true;
group = "plasmavmc";
description = "PlasmaVMC service user";
home = cfg.dataDir;
extraGroups = [ "kvm" ] ++ lib.optional coronafsEnabled "coronafs";
};
users.groups.plasmavmc = {};
# Create systemd service
systemd.services.plasmavmc = {
description = "PlasmaVMC Virtual Machine Compute Service";
wantedBy = [ "multi-user.target" ];
after = [ "network-online.target" "prismnet.service" "flaredb.service" "chainfire.service" ] ++ localIamDeps;
wants = [ "network-online.target" "prismnet.service" "flaredb.service" "chainfire.service" ] ++ localIamDeps;
path = [ pkgs.qemu pkgs.coreutils pkgs.curl ];
preStart =
lib.optionalString (localIamHealthUrl != null) ''
for _ in $(seq 1 90); do
if curl -fsS ${lib.escapeShellArg localIamHealthUrl} >/dev/null 2>&1; then
exit 0
fi
sleep 1
done
echo "plasmavmc: timed out waiting for local IAM health at ${localIamHealthUrl}" >&2
exit 1
''
+ lib.optionalString (remoteIamEndpoint != null) ''
endpoint=${lib.escapeShellArg remoteIamEndpoint}
endpoint="''${endpoint#http://}"
endpoint="''${endpoint#https://}"
host="''${endpoint%:*}"
port="''${endpoint##*:}"
for _ in $(${pkgs.coreutils}/bin/seq 1 90); do
if ${pkgs.coreutils}/bin/timeout 1 ${pkgs.bash}/bin/bash -lc "</dev/tcp/''${host}/''${port}" >/dev/null 2>&1; then
exit 0
fi
sleep 1
done
echo "plasmavmc: timed out waiting for IAM gRPC at ''${host}:''${port}" >&2
exit 1
'';
environment = lib.mkMerge [
{
PLASMAVMC_MODE = cfg.mode;
PLASMAVMC_STORAGE_BACKEND = "flaredb";
PLASMAVMC_FLAREDB_ENDPOINT = if cfg.flaredbAddr != null then cfg.flaredbAddr else "127.0.0.1:2479";
PLASMAVMC_QEMU_PATH = "${pkgs.qemu}/bin/qemu-system-x86_64";
PLASMAVMC_RUNTIME_DIR = "/run/libvirt/plasmavmc";
PLASMAVMC_IMAGE_CACHE_DIR = "${toString cfg.dataDir}/images";
PLASMAVMC_MANAGED_VOLUME_ROOT = toString cfg.managedVolumeRoot;
PLASMAVMC_SHARED_LIVE_MIGRATION = lib.boolToString cfg.sharedLiveMigration;
}
(lib.mkIf (cfg.prismnetAddr != null) {
PRISMNET_ENDPOINT = "http://${cfg.prismnetAddr}";
})
(lib.mkIf (cfg.chainfireAddr != null) {
PLASMAVMC_CHAINFIRE_ENDPOINT = "http://${cfg.chainfireAddr}";
PLASMAVMC_STATE_WATCHER = "1";
})
(lib.mkIf (cfg.lightningstorAddr != null) {
PLASMAVMC_LIGHTNINGSTOR_ENDPOINT = cfg.lightningstorAddr;
})
(lib.mkIf (effectiveCoronafsControllerEndpoint != null) {
PLASMAVMC_CORONAFS_CONTROLLER_ENDPOINT = effectiveCoronafsControllerEndpoint;
})
(lib.mkIf (effectiveCoronafsNodeEndpoint != null) {
PLASMAVMC_CORONAFS_NODE_ENDPOINT = effectiveCoronafsNodeEndpoint;
})
(lib.mkIf (cfg.coronafsNodeLocalAttach || cfg.experimentalCoronafsNodeLocalAttach) {
PLASMAVMC_CORONAFS_NODE_LOCAL_ATTACH = "1";
PLASMAVMC_CORONAFS_ENABLE_EXPERIMENTAL_NODE_LOCAL_ATTACH = "1";
})
(lib.mkIf (cfg.coronafsEndpoint != null) {
PLASMAVMC_CORONAFS_ENDPOINT = cfg.coronafsEndpoint;
})
(lib.mkIf (cfg.cephMonitors != [ ]) {
PLASMAVMC_CEPH_MONITORS = lib.concatStringsSep "," cfg.cephMonitors;
PLASMAVMC_CEPH_CLUSTER_ID = cfg.cephClusterId;
PLASMAVMC_CEPH_USER = cfg.cephUser;
})
(lib.mkIf (cfg.cephSecret != null) {
PLASMAVMC_CEPH_SECRET = cfg.cephSecret;
})
(lib.mkIf (cfg.mode != "server") {
PLASMAVMC_NODE_ID = cfg.nodeId;
PLASMAVMC_NODE_NAME = cfg.nodeName;
PLASMAVMC_NODE_HEARTBEAT_INTERVAL_SECS = toString cfg.heartbeatIntervalSeconds;
})
(lib.mkIf (cfg.controlPlaneAddr != null) {
PLASMAVMC_CONTROL_PLANE_ADDR = cfg.controlPlaneAddr;
})
(lib.mkIf (cfg.advertiseAddr != null) {
PLASMAVMC_ENDPOINT_ADVERTISE = cfg.advertiseAddr;
})
(lib.mkIf (cfg.mode == "server") {
PLASMAVMC_NODE_HEALTH_MONITOR_INTERVAL_SECS = "5";
PLASMAVMC_NODE_HEARTBEAT_TIMEOUT_SECS = "30";
})
];
serviceConfig = {
Type = "simple";
User = "plasmavmc";
Group = "plasmavmc";
SupplementaryGroups = [ "kvm" ] ++ lib.optional coronafsEnabled "coronafs";
Restart = "on-failure";
RestartSec = "10s";
# State directory management
StateDirectory = "plasmavmc";
StateDirectoryMode = "0750";
RuntimeDirectory = "libvirt";
RuntimeDirectoryMode = "0755";
# Security hardening - relaxed for KVM access
NoNewPrivileges = false; # Needed for KVM
PrivateTmp = true;
ProtectSystem = "strict";
ProtectHome = true;
ReadWritePaths =
[ cfg.dataDir "/run/libvirt" cfg.managedVolumeRoot ]
++ lib.optionals (coronafsDataDir != null) [ coronafsDataDir ];
DeviceAllow = [ "/dev/kvm rw" ];
# Start command
ExecStart = "${cfg.package}/bin/plasmavmc-server --config ${plasmavmcConfigFile}";
};
};
systemd.tmpfiles.rules = [
"d ${builtins.dirOf (toString cfg.managedVolumeRoot)} 0755 plasmavmc plasmavmc -"
"d ${toString cfg.managedVolumeRoot} 0750 plasmavmc plasmavmc -"
] ++ lib.optionals coronafsEnabled [
"d ${toString cfg.dataDir}/images 2770 plasmavmc coronafs -"
];
};
}