Move native runtime seed state into declarative Nix
This commit is contained in:
parent
9d21e2da95
commit
d6d96b8c37
6 changed files with 868 additions and 410 deletions
|
|
@ -13,6 +13,7 @@ This flow:
|
||||||
- builds all six VM images on the host
|
- builds all six VM images on the host
|
||||||
- boots the cluster in dependency order
|
- boots the cluster in dependency order
|
||||||
- validates control-plane, worker, gateway, storage, and fault-injection behavior
|
- validates control-plane, worker, gateway, storage, and fault-injection behavior
|
||||||
|
- proves that `deployer` seeds scheduler-managed native services directly from declarative Nix cluster state
|
||||||
|
|
||||||
## Publishable Checks
|
## Publishable Checks
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -184,6 +184,481 @@ let
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
mkServicePortsType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
http = mkOption {
|
||||||
|
type = types.nullOr types.port;
|
||||||
|
default = null;
|
||||||
|
description = "Optional HTTP port exposed by the service";
|
||||||
|
};
|
||||||
|
|
||||||
|
grpc = mkOption {
|
||||||
|
type = types.nullOr types.port;
|
||||||
|
default = null;
|
||||||
|
description = "Optional gRPC port exposed by the service";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkProcessType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
command = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
description = "Executable invoked by node-agent";
|
||||||
|
};
|
||||||
|
|
||||||
|
args = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Command-line arguments passed to the process";
|
||||||
|
};
|
||||||
|
|
||||||
|
workingDir = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional working directory used when spawning the process";
|
||||||
|
};
|
||||||
|
|
||||||
|
env = mkOption {
|
||||||
|
type = types.attrsOf types.str;
|
||||||
|
default = { };
|
||||||
|
description = "Environment variables injected into the process";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkContainerPortType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
containerPort = mkOption {
|
||||||
|
type = types.port;
|
||||||
|
description = "Port exposed inside the container";
|
||||||
|
};
|
||||||
|
|
||||||
|
hostPort = mkOption {
|
||||||
|
type = types.nullOr types.port;
|
||||||
|
default = null;
|
||||||
|
description = "Optional fixed host port published for this container port";
|
||||||
|
};
|
||||||
|
|
||||||
|
protocol = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional transport protocol for the published port";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkContainerVolumeType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
source = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
description = "Host-side volume source path";
|
||||||
|
};
|
||||||
|
|
||||||
|
target = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
description = "Container mount target path";
|
||||||
|
};
|
||||||
|
|
||||||
|
readOnly = mkOption {
|
||||||
|
type = types.bool;
|
||||||
|
default = false;
|
||||||
|
description = "Whether the volume should be mounted read-only";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkContainerType = types:
|
||||||
|
let
|
||||||
|
containerPortType = mkContainerPortType types;
|
||||||
|
containerVolumeType = mkContainerVolumeType types;
|
||||||
|
in types.submodule {
|
||||||
|
options = {
|
||||||
|
image = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
description = "Container image reference";
|
||||||
|
};
|
||||||
|
|
||||||
|
runtime = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Container runtime invoked by node-agent";
|
||||||
|
};
|
||||||
|
|
||||||
|
command = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Optional entrypoint override";
|
||||||
|
};
|
||||||
|
|
||||||
|
args = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Container arguments appended after the image";
|
||||||
|
};
|
||||||
|
|
||||||
|
env = mkOption {
|
||||||
|
type = types.attrsOf types.str;
|
||||||
|
default = { };
|
||||||
|
description = "Environment variables passed to the container runtime";
|
||||||
|
};
|
||||||
|
|
||||||
|
ports = mkOption {
|
||||||
|
type = types.listOf containerPortType;
|
||||||
|
default = [ ];
|
||||||
|
description = "Published container ports";
|
||||||
|
};
|
||||||
|
|
||||||
|
volumes = mkOption {
|
||||||
|
type = types.listOf containerVolumeType;
|
||||||
|
default = [ ];
|
||||||
|
description = "Host volume mounts passed to the container runtime";
|
||||||
|
};
|
||||||
|
|
||||||
|
networkMode = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional container network mode";
|
||||||
|
};
|
||||||
|
|
||||||
|
pullPolicy = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Container image pull policy";
|
||||||
|
};
|
||||||
|
|
||||||
|
workingDir = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional container working directory";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkHealthCheckType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
type = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
description = "Health check type executed by node-agent";
|
||||||
|
};
|
||||||
|
|
||||||
|
path = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional path used by HTTP health checks";
|
||||||
|
};
|
||||||
|
|
||||||
|
intervalSecs = mkOption {
|
||||||
|
type = types.nullOr types.ints.positive;
|
||||||
|
default = null;
|
||||||
|
description = "Health check interval in seconds";
|
||||||
|
};
|
||||||
|
|
||||||
|
timeoutSecs = mkOption {
|
||||||
|
type = types.nullOr types.ints.positive;
|
||||||
|
default = null;
|
||||||
|
description = "Health check timeout in seconds";
|
||||||
|
};
|
||||||
|
|
||||||
|
startupGraceSecs = mkOption {
|
||||||
|
type = types.nullOr types.ints.positive;
|
||||||
|
default = null;
|
||||||
|
description = "Startup grace period before a service is considered unhealthy";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkPlacementPolicyType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
roles = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Roles matched by the scheduler placement filter";
|
||||||
|
};
|
||||||
|
|
||||||
|
pools = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Pools matched by the scheduler placement filter";
|
||||||
|
};
|
||||||
|
|
||||||
|
nodeClasses = mkOption {
|
||||||
|
type = types.listOf types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Node classes matched by the scheduler placement filter";
|
||||||
|
};
|
||||||
|
|
||||||
|
matchLabels = mkOption {
|
||||||
|
type = types.attrsOf types.str;
|
||||||
|
default = { };
|
||||||
|
description = "Additional label selectors matched by the scheduler";
|
||||||
|
};
|
||||||
|
|
||||||
|
spreadByLabel = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional spread key used when balancing replicas";
|
||||||
|
};
|
||||||
|
|
||||||
|
maxInstancesPerNode = mkOption {
|
||||||
|
type = types.ints.positive;
|
||||||
|
default = 1;
|
||||||
|
description = "Maximum number of replicas the scheduler may place on one node";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkRolloutStrategyType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
maxUnavailable = mkOption {
|
||||||
|
type = types.ints.unsigned;
|
||||||
|
default = 1;
|
||||||
|
description = "Maximum unavailable instances allowed during a rollout";
|
||||||
|
};
|
||||||
|
|
||||||
|
maxSurge = mkOption {
|
||||||
|
type = types.ints.unsigned;
|
||||||
|
default = 1;
|
||||||
|
description = "Maximum extra instances allowed during a rollout";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkServiceScheduleType = types:
|
||||||
|
let
|
||||||
|
placementPolicyType = mkPlacementPolicyType types;
|
||||||
|
rolloutStrategyType = mkRolloutStrategyType types;
|
||||||
|
processType = mkProcessType types;
|
||||||
|
containerType = mkContainerType types;
|
||||||
|
healthCheckType = mkHealthCheckType types;
|
||||||
|
in types.submodule {
|
||||||
|
options = {
|
||||||
|
replicas = mkOption {
|
||||||
|
type = types.ints.positive;
|
||||||
|
default = 1;
|
||||||
|
description = "Desired number of scheduler-managed replicas";
|
||||||
|
};
|
||||||
|
|
||||||
|
placement = mkOption {
|
||||||
|
type = placementPolicyType;
|
||||||
|
default = { };
|
||||||
|
description = "Scheduler placement rules for the service";
|
||||||
|
};
|
||||||
|
|
||||||
|
rollout = mkOption {
|
||||||
|
type = rolloutStrategyType;
|
||||||
|
default = { };
|
||||||
|
description = "Rollout budget used by the scheduler";
|
||||||
|
};
|
||||||
|
|
||||||
|
instancePort = mkOption {
|
||||||
|
type = types.nullOr types.port;
|
||||||
|
default = null;
|
||||||
|
description = "Host port used when creating service instances";
|
||||||
|
};
|
||||||
|
|
||||||
|
meshPort = mkOption {
|
||||||
|
type = types.nullOr types.port;
|
||||||
|
default = null;
|
||||||
|
description = "Optional service mesh port for the managed instances";
|
||||||
|
};
|
||||||
|
|
||||||
|
process = mkOption {
|
||||||
|
type = types.nullOr processType;
|
||||||
|
default = null;
|
||||||
|
description = "Process-based runtime specification";
|
||||||
|
};
|
||||||
|
|
||||||
|
container = mkOption {
|
||||||
|
type = types.nullOr containerType;
|
||||||
|
default = null;
|
||||||
|
description = "Container-based runtime specification";
|
||||||
|
};
|
||||||
|
|
||||||
|
healthCheck = mkOption {
|
||||||
|
type = types.nullOr healthCheckType;
|
||||||
|
default = null;
|
||||||
|
description = "Health check performed by node-agent";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkDnsPublicationType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
zone = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
description = "FlashDNS zone used for service publication";
|
||||||
|
};
|
||||||
|
|
||||||
|
name = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional record name inside the published zone";
|
||||||
|
};
|
||||||
|
|
||||||
|
ttl = mkOption {
|
||||||
|
type = types.ints.positive;
|
||||||
|
default = 30;
|
||||||
|
description = "DNS TTL for the published record";
|
||||||
|
};
|
||||||
|
|
||||||
|
mode = mkOption {
|
||||||
|
type = types.enum [ "load_balancer" "direct" ];
|
||||||
|
default = "load_balancer";
|
||||||
|
description = "Whether DNS publishes the load balancer VIP or a direct instance address";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkLoadBalancerPublicationType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
orgId = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional organization used when provisioning FiberLB resources";
|
||||||
|
};
|
||||||
|
|
||||||
|
projectId = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional project used when provisioning FiberLB resources";
|
||||||
|
};
|
||||||
|
|
||||||
|
name = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional explicit load balancer name";
|
||||||
|
};
|
||||||
|
|
||||||
|
listenerPort = mkOption {
|
||||||
|
type = types.nullOr types.port;
|
||||||
|
default = null;
|
||||||
|
description = "Listener port exposed by the load balancer";
|
||||||
|
};
|
||||||
|
|
||||||
|
protocol = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Listener protocol for the published load balancer";
|
||||||
|
};
|
||||||
|
|
||||||
|
poolProtocol = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Backend pool protocol for the published load balancer";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkServicePublicationType = types:
|
||||||
|
let
|
||||||
|
dnsPublicationType = mkDnsPublicationType types;
|
||||||
|
loadBalancerPublicationType = mkLoadBalancerPublicationType types;
|
||||||
|
in types.submodule {
|
||||||
|
options = {
|
||||||
|
orgId = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Default organization used for service publication";
|
||||||
|
};
|
||||||
|
|
||||||
|
projectId = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Default project used for service publication";
|
||||||
|
};
|
||||||
|
|
||||||
|
dns = mkOption {
|
||||||
|
type = types.nullOr dnsPublicationType;
|
||||||
|
default = null;
|
||||||
|
description = "Optional FlashDNS publication target";
|
||||||
|
};
|
||||||
|
|
||||||
|
loadBalancer = mkOption {
|
||||||
|
type = types.nullOr loadBalancerPublicationType;
|
||||||
|
default = null;
|
||||||
|
description = "Optional FiberLB publication target";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkServiceType = types:
|
||||||
|
let
|
||||||
|
servicePortsType = mkServicePortsType types;
|
||||||
|
serviceScheduleType = mkServiceScheduleType types;
|
||||||
|
servicePublicationType = mkServicePublicationType types;
|
||||||
|
in types.submodule {
|
||||||
|
options = {
|
||||||
|
ports = mkOption {
|
||||||
|
type = types.nullOr servicePortsType;
|
||||||
|
default = null;
|
||||||
|
description = "Optional logical service ports";
|
||||||
|
};
|
||||||
|
|
||||||
|
protocol = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional service protocol";
|
||||||
|
};
|
||||||
|
|
||||||
|
mtlsRequired = mkOption {
|
||||||
|
type = types.nullOr types.bool;
|
||||||
|
default = null;
|
||||||
|
description = "Whether service-to-service traffic requires mTLS";
|
||||||
|
};
|
||||||
|
|
||||||
|
meshMode = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional mesh publication mode";
|
||||||
|
};
|
||||||
|
|
||||||
|
schedule = mkOption {
|
||||||
|
type = types.nullOr serviceScheduleType;
|
||||||
|
default = null;
|
||||||
|
description = "Scheduler-managed runtime intent";
|
||||||
|
};
|
||||||
|
|
||||||
|
publish = mkOption {
|
||||||
|
type = types.nullOr servicePublicationType;
|
||||||
|
default = null;
|
||||||
|
description = "Optional publication targets for the service";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
mkMtlsPolicyType = types: types.submodule {
|
||||||
|
options = {
|
||||||
|
environment = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional environment scope for the policy";
|
||||||
|
};
|
||||||
|
|
||||||
|
sourceService = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
description = "Source service matched by the policy";
|
||||||
|
};
|
||||||
|
|
||||||
|
targetService = mkOption {
|
||||||
|
type = types.str;
|
||||||
|
description = "Target service matched by the policy";
|
||||||
|
};
|
||||||
|
|
||||||
|
mtlsRequired = mkOption {
|
||||||
|
type = types.nullOr types.bool;
|
||||||
|
default = null;
|
||||||
|
description = "Whether the policy enforces mTLS";
|
||||||
|
};
|
||||||
|
|
||||||
|
mode = mkOption {
|
||||||
|
type = types.nullOr types.str;
|
||||||
|
default = null;
|
||||||
|
description = "Optional policy mode";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
mkNodeType = types:
|
mkNodeType = types:
|
||||||
let
|
let
|
||||||
installPlanType = mkInstallPlanType types;
|
installPlanType = mkInstallPlanType types;
|
||||||
|
|
@ -664,6 +1139,205 @@ let
|
||||||
node_id_prefix = rule.nodeIdPrefix;
|
node_id_prefix = rule.nodeIdPrefix;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
mkServicePorts = ports:
|
||||||
|
optionalAttrs (ports != null && ports.http != null) {
|
||||||
|
http = ports.http;
|
||||||
|
}
|
||||||
|
// optionalAttrs (ports != null && ports.grpc != null) {
|
||||||
|
grpc = ports.grpc;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkProcessSpec = process:
|
||||||
|
{
|
||||||
|
command = process.command;
|
||||||
|
args = process.args;
|
||||||
|
env = process.env;
|
||||||
|
}
|
||||||
|
// optionalAttrs (process.workingDir != null) {
|
||||||
|
working_dir = process.workingDir;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkContainerPortSpec = port:
|
||||||
|
{
|
||||||
|
container_port = port.containerPort;
|
||||||
|
}
|
||||||
|
// optionalAttrs (port.hostPort != null) {
|
||||||
|
host_port = port.hostPort;
|
||||||
|
}
|
||||||
|
// optionalAttrs (port.protocol != null) {
|
||||||
|
protocol = port.protocol;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkContainerVolumeSpec = volume:
|
||||||
|
{
|
||||||
|
source = volume.source;
|
||||||
|
target = volume.target;
|
||||||
|
}
|
||||||
|
// optionalAttrs volume.readOnly {
|
||||||
|
read_only = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkContainerSpec = container:
|
||||||
|
{
|
||||||
|
image = container.image;
|
||||||
|
command = container.command;
|
||||||
|
args = container.args;
|
||||||
|
env = container.env;
|
||||||
|
ports = map mkContainerPortSpec container.ports;
|
||||||
|
volumes = map mkContainerVolumeSpec container.volumes;
|
||||||
|
}
|
||||||
|
// optionalAttrs (container.runtime != null) {
|
||||||
|
runtime = container.runtime;
|
||||||
|
}
|
||||||
|
// optionalAttrs (container.networkMode != null) {
|
||||||
|
network_mode = container.networkMode;
|
||||||
|
}
|
||||||
|
// optionalAttrs (container.pullPolicy != null) {
|
||||||
|
pull_policy = container.pullPolicy;
|
||||||
|
}
|
||||||
|
// optionalAttrs (container.workingDir != null) {
|
||||||
|
working_dir = container.workingDir;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkHealthCheckSpec = healthCheck:
|
||||||
|
{
|
||||||
|
type = healthCheck.type;
|
||||||
|
}
|
||||||
|
// optionalAttrs (healthCheck.path != null) {
|
||||||
|
path = healthCheck.path;
|
||||||
|
}
|
||||||
|
// optionalAttrs (healthCheck.intervalSecs != null) {
|
||||||
|
interval_secs = healthCheck.intervalSecs;
|
||||||
|
}
|
||||||
|
// optionalAttrs (healthCheck.timeoutSecs != null) {
|
||||||
|
timeout_secs = healthCheck.timeoutSecs;
|
||||||
|
}
|
||||||
|
// optionalAttrs (healthCheck.startupGraceSecs != null) {
|
||||||
|
startup_grace_secs = healthCheck.startupGraceSecs;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkPlacementPolicySpec = placement:
|
||||||
|
{
|
||||||
|
roles = placement.roles;
|
||||||
|
pools = placement.pools;
|
||||||
|
node_classes = placement.nodeClasses;
|
||||||
|
match_labels = placement.matchLabels;
|
||||||
|
max_instances_per_node = placement.maxInstancesPerNode;
|
||||||
|
}
|
||||||
|
// optionalAttrs (placement.spreadByLabel != null) {
|
||||||
|
spread_by_label = placement.spreadByLabel;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkRolloutStrategySpec = rollout: {
|
||||||
|
max_unavailable = rollout.maxUnavailable;
|
||||||
|
max_surge = rollout.maxSurge;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkServiceScheduleSpec = schedule:
|
||||||
|
{
|
||||||
|
replicas = schedule.replicas;
|
||||||
|
placement = mkPlacementPolicySpec schedule.placement;
|
||||||
|
rollout = mkRolloutStrategySpec schedule.rollout;
|
||||||
|
}
|
||||||
|
// optionalAttrs (schedule.instancePort != null) {
|
||||||
|
instance_port = schedule.instancePort;
|
||||||
|
}
|
||||||
|
// optionalAttrs (schedule.meshPort != null) {
|
||||||
|
mesh_port = schedule.meshPort;
|
||||||
|
}
|
||||||
|
// optionalAttrs (schedule.process != null) {
|
||||||
|
process = mkProcessSpec schedule.process;
|
||||||
|
}
|
||||||
|
// optionalAttrs (schedule.container != null) {
|
||||||
|
container = mkContainerSpec schedule.container;
|
||||||
|
}
|
||||||
|
// optionalAttrs (schedule.healthCheck != null) {
|
||||||
|
health_check = mkHealthCheckSpec schedule.healthCheck;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkDnsPublicationSpec = dns:
|
||||||
|
{
|
||||||
|
zone = dns.zone;
|
||||||
|
ttl = dns.ttl;
|
||||||
|
mode = dns.mode;
|
||||||
|
}
|
||||||
|
// optionalAttrs (dns.name != null) {
|
||||||
|
name = dns.name;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkLoadBalancerPublicationSpec = loadBalancer:
|
||||||
|
optionalAttrs (loadBalancer.orgId != null) {
|
||||||
|
org_id = loadBalancer.orgId;
|
||||||
|
}
|
||||||
|
// optionalAttrs (loadBalancer.projectId != null) {
|
||||||
|
project_id = loadBalancer.projectId;
|
||||||
|
}
|
||||||
|
// optionalAttrs (loadBalancer.name != null) {
|
||||||
|
name = loadBalancer.name;
|
||||||
|
}
|
||||||
|
// optionalAttrs (loadBalancer.listenerPort != null) {
|
||||||
|
listener_port = loadBalancer.listenerPort;
|
||||||
|
}
|
||||||
|
// optionalAttrs (loadBalancer.protocol != null) {
|
||||||
|
protocol = loadBalancer.protocol;
|
||||||
|
}
|
||||||
|
// optionalAttrs (loadBalancer.poolProtocol != null) {
|
||||||
|
pool_protocol = loadBalancer.poolProtocol;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkServicePublicationSpec = publish:
|
||||||
|
optionalAttrs (publish.orgId != null) {
|
||||||
|
org_id = publish.orgId;
|
||||||
|
}
|
||||||
|
// optionalAttrs (publish.projectId != null) {
|
||||||
|
project_id = publish.projectId;
|
||||||
|
}
|
||||||
|
// optionalAttrs (publish.dns != null) {
|
||||||
|
dns = mkDnsPublicationSpec publish.dns;
|
||||||
|
}
|
||||||
|
// optionalAttrs (publish.loadBalancer != null) {
|
||||||
|
load_balancer = mkLoadBalancerPublicationSpec publish.loadBalancer;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkDeployerServiceSpec = name: service:
|
||||||
|
{
|
||||||
|
inherit name;
|
||||||
|
}
|
||||||
|
// optionalAttrs (service.ports != null && mkServicePorts service.ports != { }) {
|
||||||
|
ports = mkServicePorts service.ports;
|
||||||
|
}
|
||||||
|
// optionalAttrs (service.protocol != null) {
|
||||||
|
protocol = service.protocol;
|
||||||
|
}
|
||||||
|
// optionalAttrs (service.mtlsRequired != null) {
|
||||||
|
mtls_required = service.mtlsRequired;
|
||||||
|
}
|
||||||
|
// optionalAttrs (service.meshMode != null) {
|
||||||
|
mesh_mode = service.meshMode;
|
||||||
|
}
|
||||||
|
// optionalAttrs (service.schedule != null) {
|
||||||
|
schedule = mkServiceScheduleSpec service.schedule;
|
||||||
|
}
|
||||||
|
// optionalAttrs (service.publish != null) {
|
||||||
|
publish = mkServicePublicationSpec service.publish;
|
||||||
|
};
|
||||||
|
|
||||||
|
mkDeployerMtlsPolicySpec = name: policy:
|
||||||
|
{
|
||||||
|
policy_id = name;
|
||||||
|
source_service = policy.sourceService;
|
||||||
|
target_service = policy.targetService;
|
||||||
|
}
|
||||||
|
// optionalAttrs (policy.environment != null) {
|
||||||
|
environment = policy.environment;
|
||||||
|
}
|
||||||
|
// optionalAttrs (policy.mtlsRequired != null) {
|
||||||
|
mtls_required = policy.mtlsRequired;
|
||||||
|
}
|
||||||
|
// optionalAttrs (policy.mode != null) {
|
||||||
|
mode = policy.mode;
|
||||||
|
};
|
||||||
|
|
||||||
mkClusterConfig = {
|
mkClusterConfig = {
|
||||||
cluster,
|
cluster,
|
||||||
hostname,
|
hostname,
|
||||||
|
|
@ -729,6 +1403,8 @@ let
|
||||||
pools = deployer.pools or { };
|
pools = deployer.pools or { };
|
||||||
enrollmentRules = deployer.enrollmentRules or { };
|
enrollmentRules = deployer.enrollmentRules or { };
|
||||||
hostDeployments = deployer.hostDeployments or { };
|
hostDeployments = deployer.hostDeployments or { };
|
||||||
|
services = deployer.services or { };
|
||||||
|
mtlsPolicies = deployer.mtlsPolicies or { };
|
||||||
in {
|
in {
|
||||||
cluster = {
|
cluster = {
|
||||||
cluster_id = clusterId;
|
cluster_id = clusterId;
|
||||||
|
|
@ -740,9 +1416,9 @@ let
|
||||||
pools = map (name: mkDeployerPoolSpec name pools.${name}) (attrNames pools);
|
pools = map (name: mkDeployerPoolSpec name pools.${name}) (attrNames pools);
|
||||||
enrollment_rules = map (name: mkDeployerEnrollmentRuleSpec name enrollmentRules.${name}) (attrNames enrollmentRules);
|
enrollment_rules = map (name: mkDeployerEnrollmentRuleSpec name enrollmentRules.${name}) (attrNames enrollmentRules);
|
||||||
host_deployments = map (name: mkDeployerHostDeploymentSpec name hostDeployments.${name}) (attrNames hostDeployments);
|
host_deployments = map (name: mkDeployerHostDeploymentSpec name hostDeployments.${name}) (attrNames hostDeployments);
|
||||||
services = [ ];
|
services = map (name: mkDeployerServiceSpec name services.${name}) (attrNames services);
|
||||||
instances = [ ];
|
instances = [ ];
|
||||||
mtls_policies = [ ];
|
mtls_policies = map (name: mkDeployerMtlsPolicySpec name mtlsPolicies.${name}) (attrNames mtlsPolicies);
|
||||||
};
|
};
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
|
|
@ -751,6 +1427,20 @@ in
|
||||||
mkDesiredSystemType
|
mkDesiredSystemType
|
||||||
mkHostDeploymentSelectorType
|
mkHostDeploymentSelectorType
|
||||||
mkHostDeploymentType
|
mkHostDeploymentType
|
||||||
|
mkServicePortsType
|
||||||
|
mkProcessType
|
||||||
|
mkContainerPortType
|
||||||
|
mkContainerVolumeType
|
||||||
|
mkContainerType
|
||||||
|
mkHealthCheckType
|
||||||
|
mkPlacementPolicyType
|
||||||
|
mkRolloutStrategyType
|
||||||
|
mkServiceScheduleType
|
||||||
|
mkDnsPublicationType
|
||||||
|
mkLoadBalancerPublicationType
|
||||||
|
mkServicePublicationType
|
||||||
|
mkServiceType
|
||||||
|
mkMtlsPolicyType
|
||||||
mkNodeType
|
mkNodeType
|
||||||
mkNodeClassType
|
mkNodeClassType
|
||||||
mkNodePoolType
|
mkNodePoolType
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,8 @@ let
|
||||||
nodePoolType = clusterConfigLib.mkNodePoolType types;
|
nodePoolType = clusterConfigLib.mkNodePoolType types;
|
||||||
enrollmentRuleType = clusterConfigLib.mkEnrollmentRuleType types;
|
enrollmentRuleType = clusterConfigLib.mkEnrollmentRuleType types;
|
||||||
hostDeploymentType = clusterConfigLib.mkHostDeploymentType types;
|
hostDeploymentType = clusterConfigLib.mkHostDeploymentType types;
|
||||||
|
serviceType = clusterConfigLib.mkServiceType types;
|
||||||
|
mtlsPolicyType = clusterConfigLib.mkMtlsPolicyType types;
|
||||||
jsonFormat = pkgs.formats.json { };
|
jsonFormat = pkgs.formats.json { };
|
||||||
|
|
||||||
# Generate cluster-config.json for the current node
|
# Generate cluster-config.json for the current node
|
||||||
|
|
@ -105,6 +107,18 @@ in {
|
||||||
default = { };
|
default = { };
|
||||||
description = "Declarative host rollout objects derived from Nix";
|
description = "Declarative host rollout objects derived from Nix";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
services = mkOption {
|
||||||
|
type = types.attrsOf serviceType;
|
||||||
|
default = { };
|
||||||
|
description = "Scheduler-managed service definitions derived from Nix";
|
||||||
|
};
|
||||||
|
|
||||||
|
mtlsPolicies = mkOption {
|
||||||
|
type = types.attrsOf mtlsPolicyType;
|
||||||
|
default = { };
|
||||||
|
description = "Declarative mTLS policies derived from Nix";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
generated = {
|
generated = {
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ All VM images are built on the host in a single Nix invocation and then booted a
|
||||||
- gateway-node `apigateway`, `nightlight`, and minimal `creditservice` startup
|
- gateway-node `apigateway`, `nightlight`, and minimal `creditservice` startup
|
||||||
- host-forwarded access to the API gateway and NightLight HTTP surfaces
|
- host-forwarded access to the API gateway and NightLight HTTP surfaces
|
||||||
- cross-node data replication smoke tests for `chainfire` and `flaredb`
|
- cross-node data replication smoke tests for `chainfire` and `flaredb`
|
||||||
|
- deployer-seeded native runtime scheduling from declarative Nix service definitions, including drain/failover recovery
|
||||||
|
|
||||||
## Validation layers
|
## Validation layers
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -201,6 +201,7 @@ in
|
||||||
pool = "general";
|
pool = "general";
|
||||||
nodeClass = "worker-linux";
|
nodeClass = "worker-linux";
|
||||||
failureDomain = "zone-b";
|
failureDomain = "zone-b";
|
||||||
|
state = "provisioning";
|
||||||
raftPort = 2380;
|
raftPort = 2380;
|
||||||
apiPort = 2379;
|
apiPort = 2379;
|
||||||
};
|
};
|
||||||
|
|
@ -214,6 +215,7 @@ in
|
||||||
pool = "general";
|
pool = "general";
|
||||||
nodeClass = "worker-linux";
|
nodeClass = "worker-linux";
|
||||||
failureDomain = "zone-c";
|
failureDomain = "zone-c";
|
||||||
|
state = "provisioning";
|
||||||
raftPort = 2380;
|
raftPort = 2380;
|
||||||
apiPort = 2379;
|
apiPort = 2379;
|
||||||
};
|
};
|
||||||
|
|
@ -273,6 +275,95 @@ in
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
services = {
|
||||||
|
native-web = {
|
||||||
|
protocol = "http";
|
||||||
|
ports.http = 18190;
|
||||||
|
schedule = {
|
||||||
|
replicas = 2;
|
||||||
|
placement = {
|
||||||
|
roles = [ "worker" ];
|
||||||
|
pools = [ "general" ];
|
||||||
|
nodeClasses = [ "worker-linux" ];
|
||||||
|
matchLabels = {
|
||||||
|
runtime = "native";
|
||||||
|
};
|
||||||
|
spreadByLabel = "failure_domain";
|
||||||
|
maxInstancesPerNode = 1;
|
||||||
|
};
|
||||||
|
instancePort = 18190;
|
||||||
|
process = {
|
||||||
|
command = "python3";
|
||||||
|
args = [
|
||||||
|
"-m"
|
||||||
|
"http.server"
|
||||||
|
"\${INSTANCE_PORT}"
|
||||||
|
"--bind"
|
||||||
|
"\${INSTANCE_IP}"
|
||||||
|
];
|
||||||
|
};
|
||||||
|
healthCheck = {
|
||||||
|
type = "http";
|
||||||
|
path = "/";
|
||||||
|
intervalSecs = 5;
|
||||||
|
timeoutSecs = 3;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
publish = {
|
||||||
|
dns = {
|
||||||
|
zone = "native.cluster.test";
|
||||||
|
name = "web";
|
||||||
|
ttl = 30;
|
||||||
|
mode = "load_balancer";
|
||||||
|
};
|
||||||
|
loadBalancer = {
|
||||||
|
orgId = "native-services";
|
||||||
|
projectId = "test-cluster";
|
||||||
|
listenerPort = 18191;
|
||||||
|
protocol = "http";
|
||||||
|
poolProtocol = "http";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
native-container = {
|
||||||
|
protocol = "http";
|
||||||
|
ports.http = 18192;
|
||||||
|
schedule = {
|
||||||
|
replicas = 1;
|
||||||
|
placement = {
|
||||||
|
roles = [ "worker" ];
|
||||||
|
pools = [ "general" ];
|
||||||
|
nodeClasses = [ "worker-linux" ];
|
||||||
|
matchLabels = {
|
||||||
|
runtime = "native";
|
||||||
|
};
|
||||||
|
maxInstancesPerNode = 1;
|
||||||
|
};
|
||||||
|
instancePort = 18192;
|
||||||
|
container = {
|
||||||
|
image = "docker.io/library/nginx:1.27-alpine";
|
||||||
|
runtime = "podman";
|
||||||
|
pullPolicy = "if-not-present";
|
||||||
|
ports = [
|
||||||
|
{
|
||||||
|
containerPort = 80;
|
||||||
|
hostPort = 18192;
|
||||||
|
protocol = "tcp";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
healthCheck = {
|
||||||
|
type = "http";
|
||||||
|
path = "/";
|
||||||
|
intervalSecs = 5;
|
||||||
|
timeoutSecs = 5;
|
||||||
|
startupGraceSecs = 120;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
bootstrap.initialPeers = [ "node01" "node02" "node03" ];
|
bootstrap.initialPeers = [ "node01" "node02" "node03" ];
|
||||||
|
|
|
||||||
|
|
@ -4805,28 +4805,23 @@ validate_deployer_flow() {
|
||||||
}
|
}
|
||||||
|
|
||||||
validate_native_runtime_flow() {
|
validate_native_runtime_flow() {
|
||||||
log "Validating native deployer + scheduler runtime orchestration"
|
log "Validating native deployer + scheduler runtime orchestration from declarative Nix seed"
|
||||||
|
|
||||||
wait_for_unit node04 node-agent
|
wait_for_unit node04 node-agent
|
||||||
wait_for_unit node05 node-agent
|
wait_for_unit node05 node-agent
|
||||||
wait_for_unit node06 fleet-scheduler
|
wait_for_unit node06 fleet-scheduler
|
||||||
wait_for_http node06 "http://127.0.0.1:8088/health"
|
wait_for_http node06 "http://127.0.0.1:8088/health"
|
||||||
|
|
||||||
local tmp_dir native_config drained_config restored_config
|
|
||||||
local chainfire_tunnel_node01="" chainfire_tunnel_node02="" chainfire_tunnel_node03=""
|
local chainfire_tunnel_node01="" chainfire_tunnel_node02="" chainfire_tunnel_node03=""
|
||||||
local chainfire_endpoint="http://127.0.0.1:12379,http://127.0.0.1:12380,http://127.0.0.1:12381"
|
local chainfire_endpoint="http://127.0.0.1:12379,http://127.0.0.1:12380,http://127.0.0.1:12381"
|
||||||
local iam_tunnel="" lb_tunnel="" token lb_name
|
local iam_tunnel="" lb_tunnel="" token lb_name
|
||||||
local native_fresh_healthy_map_expr native_fresh_healthy_count_expr
|
local native_fresh_healthy_map_expr native_fresh_healthy_count_expr
|
||||||
tmp_dir="$(mktemp -d -p "${TMPDIR:-/tmp}" photon-native-runtime-XXXXXX)"
|
|
||||||
native_config="${tmp_dir}/native-runtime.yaml"
|
|
||||||
drained_config="${tmp_dir}/native-runtime-drained.yaml"
|
|
||||||
restored_config="${tmp_dir}/native-runtime-restored.yaml"
|
|
||||||
native_fresh_healthy_map_expr='map(select(.state == "healthy" and (((((.last_heartbeat // .observed_at) // "") | sub("\\.[0-9]+"; "") | sub("\\+00:00$"; "Z") | fromdateiso8601?) // 0) >= (now - 300))))'
|
native_fresh_healthy_map_expr='map(select(.state == "healthy" and (((((.last_heartbeat // .observed_at) // "") | sub("\\.[0-9]+"; "") | sub("\\+00:00$"; "Z") | fromdateiso8601?) // 0) >= (now - 300))))'
|
||||||
native_fresh_healthy_count_expr="${native_fresh_healthy_map_expr} | length"
|
native_fresh_healthy_count_expr="${native_fresh_healthy_map_expr} | length"
|
||||||
chainfire_tunnel_node01="$(start_ssh_tunnel node01 12379 2379 "${NODE_IPS[node01]}")"
|
chainfire_tunnel_node01="$(start_ssh_tunnel node01 12379 2379 "${NODE_IPS[node01]}")"
|
||||||
chainfire_tunnel_node02="$(start_ssh_tunnel node02 12380 2379 "${NODE_IPS[node02]}")"
|
chainfire_tunnel_node02="$(start_ssh_tunnel node02 12380 2379 "${NODE_IPS[node02]}")"
|
||||||
chainfire_tunnel_node03="$(start_ssh_tunnel node03 12381 2379 "${NODE_IPS[node03]}")"
|
chainfire_tunnel_node03="$(start_ssh_tunnel node03 12381 2379 "${NODE_IPS[node03]}")"
|
||||||
trap 'stop_ssh_tunnel node01 "${lb_tunnel}"; stop_ssh_tunnel node01 "${iam_tunnel}"; stop_ssh_tunnel node01 "${chainfire_tunnel_node01}"; stop_ssh_tunnel node02 "${chainfire_tunnel_node02}"; stop_ssh_tunnel node03 "${chainfire_tunnel_node03}"; rm -rf "${tmp_dir}"' RETURN
|
trap 'stop_ssh_tunnel node01 "${lb_tunnel}"; stop_ssh_tunnel node01 "${iam_tunnel}"; stop_ssh_tunnel node01 "${chainfire_tunnel_node01}"; stop_ssh_tunnel node02 "${chainfire_tunnel_node02}"; stop_ssh_tunnel node03 "${chainfire_tunnel_node03}"' RETURN
|
||||||
|
|
||||||
native_dump_values() {
|
native_dump_values() {
|
||||||
local prefix="$1"
|
local prefix="$1"
|
||||||
|
|
@ -4879,7 +4874,13 @@ validate_native_runtime_flow() {
|
||||||
local instance_value="" node_id=""
|
local instance_value="" node_id=""
|
||||||
|
|
||||||
while true; do
|
while true; do
|
||||||
instance_value="$(native_first_healthy_instance "${service}")"
|
instance_value="$(
|
||||||
|
native_dump_values "photoncloud/clusters/test-cluster/instances/${service}/" \
|
||||||
|
| sed '/^$/d' \
|
||||||
|
| jq -sr \
|
||||||
|
--arg node "${expected_node}" \
|
||||||
|
"${native_fresh_healthy_map_expr} | map(select(.node_id == \$node)) | sort_by(.instance_id) | first"
|
||||||
|
)"
|
||||||
node_id="$(printf '%s' "${instance_value}" | jq -r '.node_id // empty')"
|
node_id="$(printf '%s' "${instance_value}" | jq -r '.node_id // empty')"
|
||||||
if [[ "${node_id}" == "${expected_node}" ]]; then
|
if [[ "${node_id}" == "${expected_node}" ]]; then
|
||||||
printf '%s' "${instance_value}"
|
printf '%s' "${instance_value}"
|
||||||
|
|
@ -4955,373 +4956,22 @@ validate_native_runtime_flow() {
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
cat >"${native_config}" <<'EOF'
|
set_native_node_state() {
|
||||||
cluster:
|
local node_id="$1"
|
||||||
cluster_id: test-cluster
|
local state="$2"
|
||||||
environment: test
|
|
||||||
node_classes:
|
|
||||||
- name: worker-linux
|
|
||||||
description: Native runtime worker
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
labels:
|
|
||||||
tier: general
|
|
||||||
runtime: native
|
|
||||||
pools:
|
|
||||||
- name: general
|
|
||||||
description: General-purpose native worker pool
|
|
||||||
node_class: worker-linux
|
|
||||||
labels:
|
|
||||||
pool.photoncloud.io/name: general
|
|
||||||
nodes:
|
|
||||||
- node_id: node04
|
|
||||||
hostname: node04
|
|
||||||
ip: 10.100.0.21
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
labels:
|
|
||||||
runtime: native
|
|
||||||
pool: general
|
|
||||||
node_class: worker-linux
|
|
||||||
failure_domain: zone-b
|
|
||||||
state: provisioning
|
|
||||||
- node_id: node05
|
|
||||||
hostname: node05
|
|
||||||
ip: 10.100.0.22
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
labels:
|
|
||||||
runtime: native
|
|
||||||
pool: general
|
|
||||||
node_class: worker-linux
|
|
||||||
failure_domain: zone-c
|
|
||||||
state: provisioning
|
|
||||||
services:
|
|
||||||
- name: native-web
|
|
||||||
protocol: http
|
|
||||||
ports:
|
|
||||||
http: 18190
|
|
||||||
schedule:
|
|
||||||
replicas: 2
|
|
||||||
placement:
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
pools:
|
|
||||||
- general
|
|
||||||
node_classes:
|
|
||||||
- worker-linux
|
|
||||||
match_labels:
|
|
||||||
runtime: native
|
|
||||||
spread_by_label: failure_domain
|
|
||||||
max_instances_per_node: 1
|
|
||||||
instance_port: 18190
|
|
||||||
process:
|
|
||||||
command: python3
|
|
||||||
args:
|
|
||||||
- -m
|
|
||||||
- http.server
|
|
||||||
- ${INSTANCE_PORT}
|
|
||||||
- --bind
|
|
||||||
- ${INSTANCE_IP}
|
|
||||||
health_check:
|
|
||||||
type: http
|
|
||||||
path: /
|
|
||||||
interval_secs: 5
|
|
||||||
timeout_secs: 3
|
|
||||||
publish:
|
|
||||||
dns:
|
|
||||||
zone: native.cluster.test
|
|
||||||
name: web
|
|
||||||
ttl: 30
|
|
||||||
mode: load_balancer
|
|
||||||
load_balancer:
|
|
||||||
org_id: native-services
|
|
||||||
project_id: test-cluster
|
|
||||||
listener_port: 18191
|
|
||||||
protocol: http
|
|
||||||
pool_protocol: http
|
|
||||||
- name: native-container
|
|
||||||
protocol: http
|
|
||||||
ports:
|
|
||||||
http: 18192
|
|
||||||
schedule:
|
|
||||||
replicas: 1
|
|
||||||
placement:
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
pools:
|
|
||||||
- general
|
|
||||||
node_classes:
|
|
||||||
- worker-linux
|
|
||||||
match_labels:
|
|
||||||
runtime: native
|
|
||||||
max_instances_per_node: 1
|
|
||||||
instance_port: 18192
|
|
||||||
container:
|
|
||||||
image: docker.io/library/nginx:1.27-alpine
|
|
||||||
runtime: podman
|
|
||||||
pull_policy: if-not-present
|
|
||||||
ports:
|
|
||||||
- container_port: 80
|
|
||||||
host_port: 18192
|
|
||||||
protocol: tcp
|
|
||||||
health_check:
|
|
||||||
type: http
|
|
||||||
path: /
|
|
||||||
interval_secs: 5
|
|
||||||
timeout_secs: 5
|
|
||||||
startup_grace_secs: 120
|
|
||||||
instances: []
|
|
||||||
mtls_policies: []
|
|
||||||
EOF
|
|
||||||
|
|
||||||
cat >"${drained_config}" <<'EOF'
|
|
||||||
cluster:
|
|
||||||
cluster_id: test-cluster
|
|
||||||
environment: test
|
|
||||||
node_classes:
|
|
||||||
- name: worker-linux
|
|
||||||
description: Native runtime worker
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
labels:
|
|
||||||
tier: general
|
|
||||||
runtime: native
|
|
||||||
pools:
|
|
||||||
- name: general
|
|
||||||
description: General-purpose native worker pool
|
|
||||||
node_class: worker-linux
|
|
||||||
labels:
|
|
||||||
pool.photoncloud.io/name: general
|
|
||||||
nodes:
|
|
||||||
- node_id: node04
|
|
||||||
hostname: node04
|
|
||||||
ip: 10.100.0.21
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
labels:
|
|
||||||
runtime: native
|
|
||||||
pool: general
|
|
||||||
node_class: worker-linux
|
|
||||||
failure_domain: zone-b
|
|
||||||
state: draining
|
|
||||||
- node_id: node05
|
|
||||||
hostname: node05
|
|
||||||
ip: 10.100.0.22
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
labels:
|
|
||||||
runtime: native
|
|
||||||
pool: general
|
|
||||||
node_class: worker-linux
|
|
||||||
failure_domain: zone-c
|
|
||||||
state: active
|
|
||||||
services:
|
|
||||||
- name: native-web
|
|
||||||
protocol: http
|
|
||||||
ports:
|
|
||||||
http: 18190
|
|
||||||
schedule:
|
|
||||||
replicas: 1
|
|
||||||
placement:
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
pools:
|
|
||||||
- general
|
|
||||||
node_classes:
|
|
||||||
- worker-linux
|
|
||||||
match_labels:
|
|
||||||
runtime: native
|
|
||||||
spread_by_label: failure_domain
|
|
||||||
max_instances_per_node: 1
|
|
||||||
instance_port: 18190
|
|
||||||
process:
|
|
||||||
command: python3
|
|
||||||
args:
|
|
||||||
- -m
|
|
||||||
- http.server
|
|
||||||
- ${INSTANCE_PORT}
|
|
||||||
- --bind
|
|
||||||
- ${INSTANCE_IP}
|
|
||||||
health_check:
|
|
||||||
type: http
|
|
||||||
path: /
|
|
||||||
interval_secs: 5
|
|
||||||
timeout_secs: 3
|
|
||||||
publish:
|
|
||||||
dns:
|
|
||||||
zone: native.cluster.test
|
|
||||||
name: web
|
|
||||||
ttl: 30
|
|
||||||
mode: load_balancer
|
|
||||||
load_balancer:
|
|
||||||
org_id: native-services
|
|
||||||
project_id: test-cluster
|
|
||||||
listener_port: 18191
|
|
||||||
protocol: http
|
|
||||||
pool_protocol: http
|
|
||||||
- name: native-container
|
|
||||||
protocol: http
|
|
||||||
ports:
|
|
||||||
http: 18192
|
|
||||||
schedule:
|
|
||||||
replicas: 1
|
|
||||||
placement:
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
pools:
|
|
||||||
- general
|
|
||||||
node_classes:
|
|
||||||
- worker-linux
|
|
||||||
match_labels:
|
|
||||||
runtime: native
|
|
||||||
max_instances_per_node: 1
|
|
||||||
instance_port: 18192
|
|
||||||
container:
|
|
||||||
image: docker.io/library/nginx:1.27-alpine
|
|
||||||
runtime: podman
|
|
||||||
pull_policy: if-not-present
|
|
||||||
ports:
|
|
||||||
- container_port: 80
|
|
||||||
host_port: 18192
|
|
||||||
protocol: tcp
|
|
||||||
health_check:
|
|
||||||
type: http
|
|
||||||
path: /
|
|
||||||
interval_secs: 5
|
|
||||||
timeout_secs: 5
|
|
||||||
startup_grace_secs: 120
|
|
||||||
instances: []
|
|
||||||
mtls_policies: []
|
|
||||||
EOF
|
|
||||||
|
|
||||||
cat >"${restored_config}" <<'EOF'
|
|
||||||
cluster:
|
|
||||||
cluster_id: test-cluster
|
|
||||||
environment: test
|
|
||||||
node_classes:
|
|
||||||
- name: worker-linux
|
|
||||||
description: Native runtime worker
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
labels:
|
|
||||||
tier: general
|
|
||||||
runtime: native
|
|
||||||
pools:
|
|
||||||
- name: general
|
|
||||||
description: General-purpose native worker pool
|
|
||||||
node_class: worker-linux
|
|
||||||
labels:
|
|
||||||
pool.photoncloud.io/name: general
|
|
||||||
nodes:
|
|
||||||
- node_id: node04
|
|
||||||
hostname: node04
|
|
||||||
ip: 10.100.0.21
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
labels:
|
|
||||||
runtime: native
|
|
||||||
pool: general
|
|
||||||
node_class: worker-linux
|
|
||||||
failure_domain: zone-b
|
|
||||||
state: active
|
|
||||||
- node_id: node05
|
|
||||||
hostname: node05
|
|
||||||
ip: 10.100.0.22
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
labels:
|
|
||||||
runtime: native
|
|
||||||
pool: general
|
|
||||||
node_class: worker-linux
|
|
||||||
failure_domain: zone-c
|
|
||||||
state: active
|
|
||||||
services:
|
|
||||||
- name: native-web
|
|
||||||
protocol: http
|
|
||||||
ports:
|
|
||||||
http: 18190
|
|
||||||
schedule:
|
|
||||||
replicas: 1
|
|
||||||
placement:
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
pools:
|
|
||||||
- general
|
|
||||||
node_classes:
|
|
||||||
- worker-linux
|
|
||||||
match_labels:
|
|
||||||
runtime: native
|
|
||||||
spread_by_label: failure_domain
|
|
||||||
max_instances_per_node: 1
|
|
||||||
instance_port: 18190
|
|
||||||
process:
|
|
||||||
command: python3
|
|
||||||
args:
|
|
||||||
- -m
|
|
||||||
- http.server
|
|
||||||
- ${INSTANCE_PORT}
|
|
||||||
- --bind
|
|
||||||
- ${INSTANCE_IP}
|
|
||||||
health_check:
|
|
||||||
type: http
|
|
||||||
path: /
|
|
||||||
interval_secs: 5
|
|
||||||
timeout_secs: 3
|
|
||||||
publish:
|
|
||||||
dns:
|
|
||||||
zone: native.cluster.test
|
|
||||||
name: web
|
|
||||||
ttl: 30
|
|
||||||
mode: load_balancer
|
|
||||||
load_balancer:
|
|
||||||
org_id: native-services
|
|
||||||
project_id: test-cluster
|
|
||||||
listener_port: 18191
|
|
||||||
protocol: http
|
|
||||||
pool_protocol: http
|
|
||||||
- name: native-container
|
|
||||||
protocol: http
|
|
||||||
ports:
|
|
||||||
http: 18192
|
|
||||||
schedule:
|
|
||||||
replicas: 1
|
|
||||||
placement:
|
|
||||||
roles:
|
|
||||||
- worker
|
|
||||||
pools:
|
|
||||||
- general
|
|
||||||
node_classes:
|
|
||||||
- worker-linux
|
|
||||||
match_labels:
|
|
||||||
runtime: native
|
|
||||||
max_instances_per_node: 1
|
|
||||||
instance_port: 18192
|
|
||||||
container:
|
|
||||||
image: docker.io/library/nginx:1.27-alpine
|
|
||||||
runtime: podman
|
|
||||||
pull_policy: if-not-present
|
|
||||||
ports:
|
|
||||||
- container_port: 80
|
|
||||||
host_port: 18192
|
|
||||||
protocol: tcp
|
|
||||||
health_check:
|
|
||||||
type: http
|
|
||||||
path: /
|
|
||||||
interval_secs: 5
|
|
||||||
timeout_secs: 5
|
|
||||||
startup_grace_secs: 120
|
|
||||||
instances: []
|
|
||||||
mtls_policies: []
|
|
||||||
EOF
|
|
||||||
|
|
||||||
run_deployer_ctl \
|
run_deployer_ctl \
|
||||||
--chainfire-endpoint "${chainfire_endpoint}" \
|
--chainfire-endpoint "${chainfire_endpoint}" \
|
||||||
--cluster-id "test-cluster" \
|
--cluster-id "test-cluster" \
|
||||||
--cluster-namespace "photoncloud" \
|
--cluster-namespace "photoncloud" \
|
||||||
--deployer-namespace "deployer" \
|
--deployer-namespace "deployer" \
|
||||||
apply --config "${native_config}"
|
node set-state --node-id "${node_id}" --state "${state}"
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_native_dump_count \
|
||||||
|
"photoncloud/clusters/test-cluster/services/" \
|
||||||
|
'map(select(.name == "native-web" or .name == "native-container")) | length' \
|
||||||
|
"2" \
|
||||||
|
180
|
||||||
wait_for_native_dump_count \
|
wait_for_native_dump_count \
|
||||||
"photoncloud/clusters/test-cluster/nodes/" \
|
"photoncloud/clusters/test-cluster/nodes/" \
|
||||||
'map(select(.labels.runtime == "native" and .state == "active")) | length' \
|
'map(select(.labels.runtime == "native" and .state == "active")) | length' \
|
||||||
|
|
@ -5390,13 +5040,13 @@ EOF
|
||||||
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
||||||
wait_for_native_lb_backends "${publication_pool_id}" "2" 180 10.100.0.21 10.100.0.22
|
wait_for_native_lb_backends "${publication_pool_id}" "2" 180 10.100.0.21 10.100.0.22
|
||||||
|
|
||||||
run_deployer_ctl \
|
log "Draining node04 through deployer lifecycle state"
|
||||||
--chainfire-endpoint "${chainfire_endpoint}" \
|
set_native_node_state "node04" "draining"
|
||||||
--cluster-id "test-cluster" \
|
wait_for_native_dump_count \
|
||||||
--cluster-namespace "photoncloud" \
|
"photoncloud/clusters/test-cluster/nodes/" \
|
||||||
--deployer-namespace "deployer" \
|
'map(select(.node_id == "node04" and .state == "draining")) | length' \
|
||||||
apply --config "${drained_config}"
|
"1" \
|
||||||
|
120
|
||||||
wait_for_native_dump_count \
|
wait_for_native_dump_count \
|
||||||
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
||||||
'length' \
|
'length' \
|
||||||
|
|
@ -5433,44 +5083,38 @@ EOF
|
||||||
wait_for_native_lb_backends "${publication_pool_id}" "1" 180 10.100.0.22
|
wait_for_native_lb_backends "${publication_pool_id}" "1" 180 10.100.0.22
|
||||||
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
||||||
|
|
||||||
run_deployer_ctl \
|
log "Restoring node04 and ensuring capacity returns without moving healthy singleton work"
|
||||||
--chainfire-endpoint "${chainfire_endpoint}" \
|
set_native_node_state "node04" "active"
|
||||||
--cluster-id "test-cluster" \
|
wait_for_native_dump_count \
|
||||||
--cluster-namespace "photoncloud" \
|
"photoncloud/clusters/test-cluster/nodes/" \
|
||||||
--deployer-namespace "deployer" \
|
'map(select(.node_id == "node04" and .state == "active")) | length' \
|
||||||
apply --config "${restored_config}"
|
"1" \
|
||||||
|
120
|
||||||
wait_for_native_dump_count \
|
wait_for_native_dump_count \
|
||||||
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
||||||
'length' \
|
'length' \
|
||||||
"1" \
|
"2" \
|
||||||
240
|
240
|
||||||
wait_for_native_dump_count \
|
wait_for_native_dump_count \
|
||||||
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
||||||
"${native_fresh_healthy_count_expr}" \
|
"${native_fresh_healthy_count_expr}" \
|
||||||
"1" \
|
"2" \
|
||||||
240
|
|
||||||
wait_for_native_dump_count \
|
|
||||||
"photoncloud/clusters/test-cluster/instances/native-container/" \
|
|
||||||
'length' \
|
|
||||||
"1" \
|
|
||||||
240
|
240
|
||||||
wait_for_native_dump_count \
|
wait_for_native_dump_count \
|
||||||
"photoncloud/clusters/test-cluster/instances/native-container/" \
|
"photoncloud/clusters/test-cluster/instances/native-container/" \
|
||||||
"${native_fresh_healthy_count_expr}" \
|
"${native_fresh_healthy_count_expr}" \
|
||||||
"1" \
|
"1" \
|
||||||
240
|
240
|
||||||
local restored_web_value restored_web_node restored_container_value restored_container_node
|
wait_for_native_instance_node "native-web" "node04" 240 >/dev/null
|
||||||
restored_web_value="$(wait_for_native_instance_node "native-web" "node05" 240)"
|
wait_for_native_instance_node "native-web" "node05" 240 >/dev/null
|
||||||
restored_web_node="$(printf '%s' "${restored_web_value}" | jq -r '.node_id')"
|
local restored_container_value restored_container_node
|
||||||
[[ "${restored_web_node}" == "node05" ]] || die "native-web unexpectedly moved after node04 returned to service"
|
|
||||||
restored_container_value="$(wait_for_native_instance_node "native-container" "node05" 240)"
|
restored_container_value="$(wait_for_native_instance_node "native-container" "node05" 240)"
|
||||||
restored_container_node="$(printf '%s' "${restored_container_value}" | jq -r '.node_id')"
|
restored_container_node="$(printf '%s' "${restored_container_value}" | jq -r '.node_id')"
|
||||||
[[ "${restored_container_node}" == "node05" ]] || die "native-container unexpectedly moved after node04 returned to service"
|
[[ "${restored_container_node}" == "node05" ]] || die "native-container unexpectedly moved after node04 returned to service"
|
||||||
publication_value="$(native_publication_state)"
|
publication_value="$(native_publication_state)"
|
||||||
publication_pool_id="$(printf '%s' "${publication_value}" | jq -r '.load_balancer.pool_id')"
|
publication_pool_id="$(printf '%s' "${publication_value}" | jq -r '.load_balancer.pool_id')"
|
||||||
publication_ip="$(printf '%s' "${publication_value}" | jq -r '.dns.value')"
|
publication_ip="$(printf '%s' "${publication_value}" | jq -r '.dns.value')"
|
||||||
wait_for_native_lb_backends "${publication_pool_id}" "1" 180 10.100.0.22
|
wait_for_native_lb_backends "${publication_pool_id}" "2" 180 10.100.0.21 10.100.0.22
|
||||||
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
||||||
wait_for_http node01 "http://127.0.0.1:18191/" 240
|
wait_for_http node01 "http://127.0.0.1:18191/" 240
|
||||||
|
|
||||||
|
|
@ -5505,24 +5149,42 @@ EOF
|
||||||
wait_for_http node04 "http://10.100.0.21:18192/" 240
|
wait_for_http node04 "http://10.100.0.21:18192/" 240
|
||||||
wait_for_http node01 "http://127.0.0.1:18191/" 240
|
wait_for_http node01 "http://127.0.0.1:18191/" 240
|
||||||
|
|
||||||
log "Restarting native worker and ensuring placement stays stable"
|
log "Restarting native worker and ensuring declarative replica count is restored"
|
||||||
start_vm node05
|
start_vm node05
|
||||||
wait_for_ssh node05
|
wait_for_ssh node05
|
||||||
wait_for_unit node05 plasmavmc
|
wait_for_unit node05 plasmavmc
|
||||||
wait_for_unit node05 lightningstor
|
wait_for_unit node05 lightningstor
|
||||||
wait_for_unit node05 node-agent
|
wait_for_unit node05 node-agent
|
||||||
|
wait_for_native_dump_count \
|
||||||
local recovered_web_value recovered_web_node recovered_container_value recovered_container_node
|
"photoncloud/clusters/test-cluster/nodes/" \
|
||||||
recovered_web_value="$(wait_for_native_instance_node "native-web" "node04" 240)"
|
'map(select(.labels.runtime == "native" and .state == "active")) | length' \
|
||||||
recovered_web_node="$(printf '%s' "${recovered_web_value}" | jq -r '.node_id')"
|
"2" \
|
||||||
[[ "${recovered_web_node}" == "node04" ]] || die "native-web unexpectedly churned after node05 recovered"
|
240
|
||||||
|
wait_for_native_dump_count \
|
||||||
|
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
||||||
|
'length' \
|
||||||
|
"2" \
|
||||||
|
240
|
||||||
|
wait_for_native_dump_count \
|
||||||
|
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
||||||
|
"${native_fresh_healthy_count_expr}" \
|
||||||
|
"2" \
|
||||||
|
240
|
||||||
|
wait_for_native_dump_count \
|
||||||
|
"photoncloud/clusters/test-cluster/instances/native-container/" \
|
||||||
|
"${native_fresh_healthy_count_expr}" \
|
||||||
|
"1" \
|
||||||
|
240
|
||||||
|
wait_for_native_instance_node "native-web" "node04" 240 >/dev/null
|
||||||
|
wait_for_native_instance_node "native-web" "node05" 240 >/dev/null
|
||||||
|
local recovered_container_value recovered_container_node
|
||||||
recovered_container_value="$(wait_for_native_instance_node "native-container" "node04" 240)"
|
recovered_container_value="$(wait_for_native_instance_node "native-container" "node04" 240)"
|
||||||
recovered_container_node="$(printf '%s' "${recovered_container_value}" | jq -r '.node_id')"
|
recovered_container_node="$(printf '%s' "${recovered_container_value}" | jq -r '.node_id')"
|
||||||
[[ "${recovered_container_node}" == "node04" ]] || die "native-container unexpectedly churned after node05 recovered"
|
[[ "${recovered_container_node}" == "node04" ]] || die "native-container unexpectedly churned after node05 recovered"
|
||||||
publication_value="$(native_publication_state)"
|
publication_value="$(native_publication_state)"
|
||||||
publication_pool_id="$(printf '%s' "${publication_value}" | jq -r '.load_balancer.pool_id')"
|
publication_pool_id="$(printf '%s' "${publication_value}" | jq -r '.load_balancer.pool_id')"
|
||||||
publication_ip="$(printf '%s' "${publication_value}" | jq -r '.dns.value')"
|
publication_ip="$(printf '%s' "${publication_value}" | jq -r '.dns.value')"
|
||||||
wait_for_native_lb_backends "${publication_pool_id}" "1" 180 10.100.0.21
|
wait_for_native_lb_backends "${publication_pool_id}" "2" 180 10.100.0.21 10.100.0.22
|
||||||
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
||||||
wait_for_http node01 "http://127.0.0.1:18191/" 240
|
wait_for_http node01 "http://127.0.0.1:18191/" 240
|
||||||
|
|
||||||
|
|
@ -5532,7 +5194,6 @@ EOF
|
||||||
stop_ssh_tunnel node01 "${chainfire_tunnel_node01}"
|
stop_ssh_tunnel node01 "${chainfire_tunnel_node01}"
|
||||||
stop_ssh_tunnel node02 "${chainfire_tunnel_node02}"
|
stop_ssh_tunnel node02 "${chainfire_tunnel_node02}"
|
||||||
stop_ssh_tunnel node03 "${chainfire_tunnel_node03}"
|
stop_ssh_tunnel node03 "${chainfire_tunnel_node03}"
|
||||||
rm -rf "${tmp_dir}"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
validate_network_provider_matrix() {
|
validate_network_provider_matrix() {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue