photoncloud-monorepo/nix/modules/node-agent.nix

117 lines
4 KiB
Nix

{ config, lib, pkgs, ... }:
let
cfg = config.services.node-agent;
pidDir = "${cfg.stateDir}/pids";
in
{
options.services.node-agent = {
enable = lib.mkEnableOption "UltraCloud node-agent service (native runtime reconcile only, consuming fleet-scheduler instance state)";
chainfireEndpoint = lib.mkOption {
type = lib.types.str;
default = "http://127.0.0.1:2379";
description = "ChainFire endpoint consumed by node-agent for scheduled runtime state; node-agent does not switch the base OS.";
};
clusterNamespace = lib.mkOption {
type = lib.types.str;
default = "ultracloud";
description = "Cluster namespace prefix";
};
clusterId = lib.mkOption {
type = lib.types.str;
description = "Cluster ID reconciled by node-agent";
};
nodeId = lib.mkOption {
type = lib.types.str;
default = config.networking.hostName;
description = "Node ID represented by this agent";
};
intervalSecs = lib.mkOption {
type = lib.types.int;
default = 15;
description = "Polling interval in seconds";
};
apply = lib.mkOption {
type = lib.types.bool;
default = true;
description = "Apply desired runtime state on the node; base-system rollout remains the nix-agent path.";
};
allowLocalInstanceUpsert = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Allow /etc/ultracloud/instances.json upserts into ChainFire";
};
enableContainers = lib.mkOption {
type = lib.types.bool;
default = true;
description = "Install and enable Podman for container-based host workloads managed by node-agent. This is separate from k8shost tenant workload semantics.";
};
extraPackages = lib.mkOption {
type = lib.types.listOf lib.types.package;
default = [ ];
description = "Additional packages made available to managed workloads. Secrets and volume contents still have to exist on the host already; node-agent does not provision them.";
};
package = lib.mkOption {
type = lib.types.package;
default = pkgs.node-agent or (throw "node-agent package not found");
description = "Package to use for node-agent";
};
stateDir = lib.mkOption {
type = lib.types.str;
default = "/var/lib/node-agent";
description = "State directory for node-agent process metadata. Per-instance pid files, argv and boot-id metadata, and combined stdout/stderr logs live under ${cfg.stateDir}/pids.";
};
};
config = lib.mkIf cfg.enable {
virtualisation.podman.enable = cfg.enableContainers;
environment.systemPackages =
lib.mkAfter (lib.optionals cfg.enableContainers [ pkgs.podman ] ++ cfg.extraPackages);
systemd.tmpfiles.rules = [
"d ${cfg.stateDir} 0750 root root -"
"d ${pidDir} 0750 root root -"
];
systemd.services.node-agent = {
description = "UltraCloud Node Agent (native runtime reconcile only)";
wantedBy = [ "multi-user.target" ];
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
path =
[ config.system.path ]
++ lib.optionals cfg.enableContainers [ pkgs.podman ]
++ cfg.extraPackages;
serviceConfig = {
Type = "simple";
Restart = "on-failure";
RestartSec = "5s";
WorkingDirectory = cfg.stateDir;
ExecStart = ''
${cfg.package}/bin/node-agent \
--chainfire-endpoint ${lib.escapeShellArg cfg.chainfireEndpoint} \
--cluster-namespace ${lib.escapeShellArg cfg.clusterNamespace} \
--cluster-id ${lib.escapeShellArg cfg.clusterId} \
--node-id ${lib.escapeShellArg cfg.nodeId} \
--interval-secs ${toString cfg.intervalSecs} \
--pid-dir ${lib.escapeShellArg pidDir} \
${lib.optionalString cfg.apply "--apply"} \
${lib.optionalString cfg.allowLocalInstanceUpsert "--allow-local-instance-upsert"}
'';
};
};
};
}