photoncloud-monorepo/nix/modules/deployer.nix

310 lines
9.7 KiB
Nix

{ config, lib, pkgs, ... }:
let
cfg = config.services.deployer;
tomlFormat = pkgs.formats.toml { };
generatedConfig = {
bind_addr = cfg.bindAddr;
chainfire = {
endpoints = cfg.chainfireEndpoints;
namespace = cfg.chainfireNamespace;
};
cluster_namespace = cfg.clusterNamespace;
heartbeat_timeout_secs = cfg.heartbeatTimeoutSecs;
local_state_path = cfg.localStatePath;
allow_admin_fallback = cfg.allowAdminFallback;
allow_unauthenticated = cfg.allowUnauthenticated;
require_chainfire = cfg.requireChainfire;
allow_unknown_nodes = cfg.allowUnknownNodes;
allow_test_mappings = cfg.allowTestMappings;
tls_self_signed = cfg.tlsSelfSigned;
}
// lib.optionalAttrs (cfg.clusterId != null) {
cluster_id = cfg.clusterId;
}
// lib.optionalAttrs (cfg.bootstrapFlakeBundle != null) {
bootstrap_flake_bundle_path = toString cfg.bootstrapFlakeBundle;
}
// lib.optionalAttrs (cfg.tlsCaCertPath != null) {
tls_ca_cert_path = cfg.tlsCaCertPath;
}
// lib.optionalAttrs (cfg.tlsCaKeyPath != null) {
tls_ca_key_path = cfg.tlsCaKeyPath;
};
configFile = tomlFormat.generate "deployer.toml" generatedConfig;
in
{
options.services.deployer = {
enable = lib.mkEnableOption "deployer bootstrap orchestration service";
bindAddr = lib.mkOption {
type = lib.types.str;
default = "0.0.0.0:8080";
description = "Bind address for deployer HTTP API";
};
chainfireEndpoints = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
description = "ChainFire endpoints for persistent deployer state";
example = [ "http://127.0.0.1:2379" ];
};
chainfireNamespace = lib.mkOption {
type = lib.types.str;
default = "deployer";
description = "Namespace prefix used in ChainFire";
};
clusterId = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "Cluster ID used when writing desired state";
};
clusterNamespace = lib.mkOption {
type = lib.types.str;
default = "photoncloud";
description = "Cluster namespace prefix";
};
heartbeatTimeoutSecs = lib.mkOption {
type = lib.types.int;
default = 300;
description = "Node heartbeat timeout in seconds";
};
localStatePath = lib.mkOption {
type = lib.types.str;
default = "/var/lib/deployer/state";
description = "Local storage path for deployer bootstrap state";
};
bootstrapFlakeBundle = lib.mkOption {
type = lib.types.nullOr lib.types.path;
default = null;
description = "Optional tar.gz bundle served to bootstrap installers as the canonical PhotonCloud flake source";
};
requireChainfire = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Fail startup when ChainFire is unavailable";
};
allowUnauthenticated = lib.mkOption {
type = lib.types.bool;
default = true;
description = "Allow unauthenticated API requests";
};
allowUnknownNodes = lib.mkOption {
type = lib.types.bool;
default = true;
description = "Allow unknown machine-id auto registration";
};
allowTestMappings = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Enable built-in test machine-id mappings";
};
bootstrapToken = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "Shared bootstrap token for phone-home API";
};
adminToken = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "Shared admin token for admin APIs";
};
clusterStateFile = lib.mkOption {
type = lib.types.nullOr lib.types.path;
default = null;
description = "Optional declarative cluster state JSON/YAML file applied with deployer-ctl";
};
seedClusterState = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Apply the declarative cluster state file to ChainFire during boot";
};
seedClusterStatePrune = lib.mkOption {
type = lib.types.bool;
default = true;
description = "Prune stale cluster-state objects when applying declarative state";
};
seedClusterStateRetryAttempts = lib.mkOption {
type = lib.types.int;
default = 30;
description = "Number of retries when seeding declarative cluster state";
};
seedClusterStateRetrySecs = lib.mkOption {
type = lib.types.int;
default = 5;
description = "Seconds to wait between deployer cluster-state seed retries";
};
allowAdminFallback = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Allow admin auth fallback to bootstrap token";
};
tlsCaCertPath = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "Optional CA certificate path for issuing node TLS certificates";
};
tlsCaKeyPath = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "Optional CA private key path for issuing node TLS certificates";
};
tlsSelfSigned = lib.mkOption {
type = lib.types.bool;
default = false;
description = "Issue self-signed node certificates when CA is not configured";
};
package = lib.mkOption {
type = lib.types.package;
default = pkgs.deployer-server or (throw "deployer-server package not found");
description = "Package to use for deployer";
};
ctlPackage = lib.mkOption {
type = lib.types.package;
default = pkgs.deployer-ctl or (throw "deployer-ctl package not found");
description = "Package to use for deployer-ctl";
};
};
config = lib.mkIf cfg.enable {
assertions = [
{
assertion = (!cfg.seedClusterState) || cfg.clusterStateFile != null;
message = "services.deployer.seedClusterState requires services.deployer.clusterStateFile";
}
{
assertion = (!cfg.seedClusterState) || cfg.chainfireEndpoints != [ ];
message = "services.deployer.seedClusterState requires services.deployer.chainfireEndpoints";
}
];
services.deployer.clusterStateFile =
lib.mkDefault (
if config.system.build ? plasmacloudDeployerClusterState then
config.system.build.plasmacloudDeployerClusterState
else
null
);
users.users.deployer = {
isSystemUser = true;
group = "deployer";
description = "Deployer service user";
home = "/var/lib/deployer";
};
users.groups.deployer = { };
systemd.tmpfiles.rules = [
"d /var/lib/deployer 0750 deployer deployer -"
"d ${cfg.localStatePath} 0750 deployer deployer -"
];
systemd.services.deployer = {
description = "PlasmaCloud Deployer Server";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
environment = {}
// lib.optionalAttrs (cfg.bootstrapToken != null) {
DEPLOYER_BOOTSTRAP_TOKEN = cfg.bootstrapToken;
}
// lib.optionalAttrs (cfg.adminToken != null) {
DEPLOYER_ADMIN_TOKEN = cfg.adminToken;
};
serviceConfig = {
Type = "simple";
User = "deployer";
Group = "deployer";
Restart = "on-failure";
RestartSec = "5s";
StateDirectory = "deployer";
StateDirectoryMode = "0750";
NoNewPrivileges = true;
PrivateTmp = true;
ProtectSystem = "strict";
ProtectHome = true;
ReadWritePaths = [ "/var/lib/deployer" cfg.localStatePath ];
ExecStart = "${cfg.package}/bin/deployer-server --config ${configFile}";
};
};
systemd.services.deployer-seed-cluster-state = lib.mkIf cfg.seedClusterState {
description = "Seed PlasmaCloud cluster state from declarative Nix output";
wantedBy = [ "multi-user.target" ];
wants = [ "network-online.target" "deployer.service" ];
after = [ "network-online.target" "deployer.service" ];
path = [ pkgs.coreutils cfg.ctlPackage ];
script =
let
chainfireEndpointsArg = lib.concatStringsSep "," cfg.chainfireEndpoints;
pruneArg = lib.optionalString cfg.seedClusterStatePrune " --prune";
in
''
set -euo pipefail
cluster_id_args=()
${lib.optionalString (cfg.clusterId != null) ''
cluster_id_args+=(--cluster-id ${lib.escapeShellArg cfg.clusterId})
''}
attempt=1
until [ "$attempt" -gt ${toString cfg.seedClusterStateRetryAttempts} ]; do
if ${cfg.ctlPackage}/bin/deployer-ctl \
--chainfire-endpoint ${lib.escapeShellArg chainfireEndpointsArg} \
"''${cluster_id_args[@]}" \
--cluster-namespace ${lib.escapeShellArg cfg.clusterNamespace} \
--deployer-namespace ${lib.escapeShellArg cfg.chainfireNamespace} \
apply --config ${lib.escapeShellArg (toString cfg.clusterStateFile)}${pruneArg}; then
exit 0
fi
echo "deployer cluster-state seed attempt $attempt/${toString cfg.seedClusterStateRetryAttempts} failed; retrying in ${toString cfg.seedClusterStateRetrySecs}s" >&2
attempt=$((attempt + 1))
sleep ${toString cfg.seedClusterStateRetrySecs}
done
echo "failed to seed deployer cluster state after ${toString cfg.seedClusterStateRetryAttempts} attempts" >&2
exit 1
'';
serviceConfig = {
Type = "oneshot";
User = "deployer";
Group = "deployer";
NoNewPrivileges = true;
PrivateTmp = true;
ProtectSystem = "strict";
ProtectHome = true;
};
};
};
}