Move native runtime seed state into declarative Nix
This commit is contained in:
parent
9d21e2da95
commit
d6d96b8c37
6 changed files with 868 additions and 410 deletions
|
|
@ -13,6 +13,7 @@ This flow:
|
|||
- builds all six VM images on the host
|
||||
- boots the cluster in dependency order
|
||||
- validates control-plane, worker, gateway, storage, and fault-injection behavior
|
||||
- proves that `deployer` seeds scheduler-managed native services directly from declarative Nix cluster state
|
||||
|
||||
## Publishable Checks
|
||||
|
||||
|
|
|
|||
|
|
@ -184,6 +184,481 @@ let
|
|||
};
|
||||
};
|
||||
|
||||
mkServicePortsType = types: types.submodule {
|
||||
options = {
|
||||
http = mkOption {
|
||||
type = types.nullOr types.port;
|
||||
default = null;
|
||||
description = "Optional HTTP port exposed by the service";
|
||||
};
|
||||
|
||||
grpc = mkOption {
|
||||
type = types.nullOr types.port;
|
||||
default = null;
|
||||
description = "Optional gRPC port exposed by the service";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkProcessType = types: types.submodule {
|
||||
options = {
|
||||
command = mkOption {
|
||||
type = types.str;
|
||||
description = "Executable invoked by node-agent";
|
||||
};
|
||||
|
||||
args = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [ ];
|
||||
description = "Command-line arguments passed to the process";
|
||||
};
|
||||
|
||||
workingDir = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional working directory used when spawning the process";
|
||||
};
|
||||
|
||||
env = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = { };
|
||||
description = "Environment variables injected into the process";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkContainerPortType = types: types.submodule {
|
||||
options = {
|
||||
containerPort = mkOption {
|
||||
type = types.port;
|
||||
description = "Port exposed inside the container";
|
||||
};
|
||||
|
||||
hostPort = mkOption {
|
||||
type = types.nullOr types.port;
|
||||
default = null;
|
||||
description = "Optional fixed host port published for this container port";
|
||||
};
|
||||
|
||||
protocol = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional transport protocol for the published port";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkContainerVolumeType = types: types.submodule {
|
||||
options = {
|
||||
source = mkOption {
|
||||
type = types.str;
|
||||
description = "Host-side volume source path";
|
||||
};
|
||||
|
||||
target = mkOption {
|
||||
type = types.str;
|
||||
description = "Container mount target path";
|
||||
};
|
||||
|
||||
readOnly = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Whether the volume should be mounted read-only";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkContainerType = types:
|
||||
let
|
||||
containerPortType = mkContainerPortType types;
|
||||
containerVolumeType = mkContainerVolumeType types;
|
||||
in types.submodule {
|
||||
options = {
|
||||
image = mkOption {
|
||||
type = types.str;
|
||||
description = "Container image reference";
|
||||
};
|
||||
|
||||
runtime = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Container runtime invoked by node-agent";
|
||||
};
|
||||
|
||||
command = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [ ];
|
||||
description = "Optional entrypoint override";
|
||||
};
|
||||
|
||||
args = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [ ];
|
||||
description = "Container arguments appended after the image";
|
||||
};
|
||||
|
||||
env = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = { };
|
||||
description = "Environment variables passed to the container runtime";
|
||||
};
|
||||
|
||||
ports = mkOption {
|
||||
type = types.listOf containerPortType;
|
||||
default = [ ];
|
||||
description = "Published container ports";
|
||||
};
|
||||
|
||||
volumes = mkOption {
|
||||
type = types.listOf containerVolumeType;
|
||||
default = [ ];
|
||||
description = "Host volume mounts passed to the container runtime";
|
||||
};
|
||||
|
||||
networkMode = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional container network mode";
|
||||
};
|
||||
|
||||
pullPolicy = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Container image pull policy";
|
||||
};
|
||||
|
||||
workingDir = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional container working directory";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkHealthCheckType = types: types.submodule {
|
||||
options = {
|
||||
type = mkOption {
|
||||
type = types.str;
|
||||
description = "Health check type executed by node-agent";
|
||||
};
|
||||
|
||||
path = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional path used by HTTP health checks";
|
||||
};
|
||||
|
||||
intervalSecs = mkOption {
|
||||
type = types.nullOr types.ints.positive;
|
||||
default = null;
|
||||
description = "Health check interval in seconds";
|
||||
};
|
||||
|
||||
timeoutSecs = mkOption {
|
||||
type = types.nullOr types.ints.positive;
|
||||
default = null;
|
||||
description = "Health check timeout in seconds";
|
||||
};
|
||||
|
||||
startupGraceSecs = mkOption {
|
||||
type = types.nullOr types.ints.positive;
|
||||
default = null;
|
||||
description = "Startup grace period before a service is considered unhealthy";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkPlacementPolicyType = types: types.submodule {
|
||||
options = {
|
||||
roles = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [ ];
|
||||
description = "Roles matched by the scheduler placement filter";
|
||||
};
|
||||
|
||||
pools = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [ ];
|
||||
description = "Pools matched by the scheduler placement filter";
|
||||
};
|
||||
|
||||
nodeClasses = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [ ];
|
||||
description = "Node classes matched by the scheduler placement filter";
|
||||
};
|
||||
|
||||
matchLabels = mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = { };
|
||||
description = "Additional label selectors matched by the scheduler";
|
||||
};
|
||||
|
||||
spreadByLabel = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional spread key used when balancing replicas";
|
||||
};
|
||||
|
||||
maxInstancesPerNode = mkOption {
|
||||
type = types.ints.positive;
|
||||
default = 1;
|
||||
description = "Maximum number of replicas the scheduler may place on one node";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkRolloutStrategyType = types: types.submodule {
|
||||
options = {
|
||||
maxUnavailable = mkOption {
|
||||
type = types.ints.unsigned;
|
||||
default = 1;
|
||||
description = "Maximum unavailable instances allowed during a rollout";
|
||||
};
|
||||
|
||||
maxSurge = mkOption {
|
||||
type = types.ints.unsigned;
|
||||
default = 1;
|
||||
description = "Maximum extra instances allowed during a rollout";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkServiceScheduleType = types:
|
||||
let
|
||||
placementPolicyType = mkPlacementPolicyType types;
|
||||
rolloutStrategyType = mkRolloutStrategyType types;
|
||||
processType = mkProcessType types;
|
||||
containerType = mkContainerType types;
|
||||
healthCheckType = mkHealthCheckType types;
|
||||
in types.submodule {
|
||||
options = {
|
||||
replicas = mkOption {
|
||||
type = types.ints.positive;
|
||||
default = 1;
|
||||
description = "Desired number of scheduler-managed replicas";
|
||||
};
|
||||
|
||||
placement = mkOption {
|
||||
type = placementPolicyType;
|
||||
default = { };
|
||||
description = "Scheduler placement rules for the service";
|
||||
};
|
||||
|
||||
rollout = mkOption {
|
||||
type = rolloutStrategyType;
|
||||
default = { };
|
||||
description = "Rollout budget used by the scheduler";
|
||||
};
|
||||
|
||||
instancePort = mkOption {
|
||||
type = types.nullOr types.port;
|
||||
default = null;
|
||||
description = "Host port used when creating service instances";
|
||||
};
|
||||
|
||||
meshPort = mkOption {
|
||||
type = types.nullOr types.port;
|
||||
default = null;
|
||||
description = "Optional service mesh port for the managed instances";
|
||||
};
|
||||
|
||||
process = mkOption {
|
||||
type = types.nullOr processType;
|
||||
default = null;
|
||||
description = "Process-based runtime specification";
|
||||
};
|
||||
|
||||
container = mkOption {
|
||||
type = types.nullOr containerType;
|
||||
default = null;
|
||||
description = "Container-based runtime specification";
|
||||
};
|
||||
|
||||
healthCheck = mkOption {
|
||||
type = types.nullOr healthCheckType;
|
||||
default = null;
|
||||
description = "Health check performed by node-agent";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkDnsPublicationType = types: types.submodule {
|
||||
options = {
|
||||
zone = mkOption {
|
||||
type = types.str;
|
||||
description = "FlashDNS zone used for service publication";
|
||||
};
|
||||
|
||||
name = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional record name inside the published zone";
|
||||
};
|
||||
|
||||
ttl = mkOption {
|
||||
type = types.ints.positive;
|
||||
default = 30;
|
||||
description = "DNS TTL for the published record";
|
||||
};
|
||||
|
||||
mode = mkOption {
|
||||
type = types.enum [ "load_balancer" "direct" ];
|
||||
default = "load_balancer";
|
||||
description = "Whether DNS publishes the load balancer VIP or a direct instance address";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkLoadBalancerPublicationType = types: types.submodule {
|
||||
options = {
|
||||
orgId = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional organization used when provisioning FiberLB resources";
|
||||
};
|
||||
|
||||
projectId = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional project used when provisioning FiberLB resources";
|
||||
};
|
||||
|
||||
name = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional explicit load balancer name";
|
||||
};
|
||||
|
||||
listenerPort = mkOption {
|
||||
type = types.nullOr types.port;
|
||||
default = null;
|
||||
description = "Listener port exposed by the load balancer";
|
||||
};
|
||||
|
||||
protocol = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Listener protocol for the published load balancer";
|
||||
};
|
||||
|
||||
poolProtocol = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Backend pool protocol for the published load balancer";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkServicePublicationType = types:
|
||||
let
|
||||
dnsPublicationType = mkDnsPublicationType types;
|
||||
loadBalancerPublicationType = mkLoadBalancerPublicationType types;
|
||||
in types.submodule {
|
||||
options = {
|
||||
orgId = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Default organization used for service publication";
|
||||
};
|
||||
|
||||
projectId = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Default project used for service publication";
|
||||
};
|
||||
|
||||
dns = mkOption {
|
||||
type = types.nullOr dnsPublicationType;
|
||||
default = null;
|
||||
description = "Optional FlashDNS publication target";
|
||||
};
|
||||
|
||||
loadBalancer = mkOption {
|
||||
type = types.nullOr loadBalancerPublicationType;
|
||||
default = null;
|
||||
description = "Optional FiberLB publication target";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkServiceType = types:
|
||||
let
|
||||
servicePortsType = mkServicePortsType types;
|
||||
serviceScheduleType = mkServiceScheduleType types;
|
||||
servicePublicationType = mkServicePublicationType types;
|
||||
in types.submodule {
|
||||
options = {
|
||||
ports = mkOption {
|
||||
type = types.nullOr servicePortsType;
|
||||
default = null;
|
||||
description = "Optional logical service ports";
|
||||
};
|
||||
|
||||
protocol = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional service protocol";
|
||||
};
|
||||
|
||||
mtlsRequired = mkOption {
|
||||
type = types.nullOr types.bool;
|
||||
default = null;
|
||||
description = "Whether service-to-service traffic requires mTLS";
|
||||
};
|
||||
|
||||
meshMode = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional mesh publication mode";
|
||||
};
|
||||
|
||||
schedule = mkOption {
|
||||
type = types.nullOr serviceScheduleType;
|
||||
default = null;
|
||||
description = "Scheduler-managed runtime intent";
|
||||
};
|
||||
|
||||
publish = mkOption {
|
||||
type = types.nullOr servicePublicationType;
|
||||
default = null;
|
||||
description = "Optional publication targets for the service";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkMtlsPolicyType = types: types.submodule {
|
||||
options = {
|
||||
environment = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional environment scope for the policy";
|
||||
};
|
||||
|
||||
sourceService = mkOption {
|
||||
type = types.str;
|
||||
description = "Source service matched by the policy";
|
||||
};
|
||||
|
||||
targetService = mkOption {
|
||||
type = types.str;
|
||||
description = "Target service matched by the policy";
|
||||
};
|
||||
|
||||
mtlsRequired = mkOption {
|
||||
type = types.nullOr types.bool;
|
||||
default = null;
|
||||
description = "Whether the policy enforces mTLS";
|
||||
};
|
||||
|
||||
mode = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Optional policy mode";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkNodeType = types:
|
||||
let
|
||||
installPlanType = mkInstallPlanType types;
|
||||
|
|
@ -664,6 +1139,205 @@ let
|
|||
node_id_prefix = rule.nodeIdPrefix;
|
||||
};
|
||||
|
||||
mkServicePorts = ports:
|
||||
optionalAttrs (ports != null && ports.http != null) {
|
||||
http = ports.http;
|
||||
}
|
||||
// optionalAttrs (ports != null && ports.grpc != null) {
|
||||
grpc = ports.grpc;
|
||||
};
|
||||
|
||||
mkProcessSpec = process:
|
||||
{
|
||||
command = process.command;
|
||||
args = process.args;
|
||||
env = process.env;
|
||||
}
|
||||
// optionalAttrs (process.workingDir != null) {
|
||||
working_dir = process.workingDir;
|
||||
};
|
||||
|
||||
mkContainerPortSpec = port:
|
||||
{
|
||||
container_port = port.containerPort;
|
||||
}
|
||||
// optionalAttrs (port.hostPort != null) {
|
||||
host_port = port.hostPort;
|
||||
}
|
||||
// optionalAttrs (port.protocol != null) {
|
||||
protocol = port.protocol;
|
||||
};
|
||||
|
||||
mkContainerVolumeSpec = volume:
|
||||
{
|
||||
source = volume.source;
|
||||
target = volume.target;
|
||||
}
|
||||
// optionalAttrs volume.readOnly {
|
||||
read_only = true;
|
||||
};
|
||||
|
||||
mkContainerSpec = container:
|
||||
{
|
||||
image = container.image;
|
||||
command = container.command;
|
||||
args = container.args;
|
||||
env = container.env;
|
||||
ports = map mkContainerPortSpec container.ports;
|
||||
volumes = map mkContainerVolumeSpec container.volumes;
|
||||
}
|
||||
// optionalAttrs (container.runtime != null) {
|
||||
runtime = container.runtime;
|
||||
}
|
||||
// optionalAttrs (container.networkMode != null) {
|
||||
network_mode = container.networkMode;
|
||||
}
|
||||
// optionalAttrs (container.pullPolicy != null) {
|
||||
pull_policy = container.pullPolicy;
|
||||
}
|
||||
// optionalAttrs (container.workingDir != null) {
|
||||
working_dir = container.workingDir;
|
||||
};
|
||||
|
||||
mkHealthCheckSpec = healthCheck:
|
||||
{
|
||||
type = healthCheck.type;
|
||||
}
|
||||
// optionalAttrs (healthCheck.path != null) {
|
||||
path = healthCheck.path;
|
||||
}
|
||||
// optionalAttrs (healthCheck.intervalSecs != null) {
|
||||
interval_secs = healthCheck.intervalSecs;
|
||||
}
|
||||
// optionalAttrs (healthCheck.timeoutSecs != null) {
|
||||
timeout_secs = healthCheck.timeoutSecs;
|
||||
}
|
||||
// optionalAttrs (healthCheck.startupGraceSecs != null) {
|
||||
startup_grace_secs = healthCheck.startupGraceSecs;
|
||||
};
|
||||
|
||||
mkPlacementPolicySpec = placement:
|
||||
{
|
||||
roles = placement.roles;
|
||||
pools = placement.pools;
|
||||
node_classes = placement.nodeClasses;
|
||||
match_labels = placement.matchLabels;
|
||||
max_instances_per_node = placement.maxInstancesPerNode;
|
||||
}
|
||||
// optionalAttrs (placement.spreadByLabel != null) {
|
||||
spread_by_label = placement.spreadByLabel;
|
||||
};
|
||||
|
||||
mkRolloutStrategySpec = rollout: {
|
||||
max_unavailable = rollout.maxUnavailable;
|
||||
max_surge = rollout.maxSurge;
|
||||
};
|
||||
|
||||
mkServiceScheduleSpec = schedule:
|
||||
{
|
||||
replicas = schedule.replicas;
|
||||
placement = mkPlacementPolicySpec schedule.placement;
|
||||
rollout = mkRolloutStrategySpec schedule.rollout;
|
||||
}
|
||||
// optionalAttrs (schedule.instancePort != null) {
|
||||
instance_port = schedule.instancePort;
|
||||
}
|
||||
// optionalAttrs (schedule.meshPort != null) {
|
||||
mesh_port = schedule.meshPort;
|
||||
}
|
||||
// optionalAttrs (schedule.process != null) {
|
||||
process = mkProcessSpec schedule.process;
|
||||
}
|
||||
// optionalAttrs (schedule.container != null) {
|
||||
container = mkContainerSpec schedule.container;
|
||||
}
|
||||
// optionalAttrs (schedule.healthCheck != null) {
|
||||
health_check = mkHealthCheckSpec schedule.healthCheck;
|
||||
};
|
||||
|
||||
mkDnsPublicationSpec = dns:
|
||||
{
|
||||
zone = dns.zone;
|
||||
ttl = dns.ttl;
|
||||
mode = dns.mode;
|
||||
}
|
||||
// optionalAttrs (dns.name != null) {
|
||||
name = dns.name;
|
||||
};
|
||||
|
||||
mkLoadBalancerPublicationSpec = loadBalancer:
|
||||
optionalAttrs (loadBalancer.orgId != null) {
|
||||
org_id = loadBalancer.orgId;
|
||||
}
|
||||
// optionalAttrs (loadBalancer.projectId != null) {
|
||||
project_id = loadBalancer.projectId;
|
||||
}
|
||||
// optionalAttrs (loadBalancer.name != null) {
|
||||
name = loadBalancer.name;
|
||||
}
|
||||
// optionalAttrs (loadBalancer.listenerPort != null) {
|
||||
listener_port = loadBalancer.listenerPort;
|
||||
}
|
||||
// optionalAttrs (loadBalancer.protocol != null) {
|
||||
protocol = loadBalancer.protocol;
|
||||
}
|
||||
// optionalAttrs (loadBalancer.poolProtocol != null) {
|
||||
pool_protocol = loadBalancer.poolProtocol;
|
||||
};
|
||||
|
||||
mkServicePublicationSpec = publish:
|
||||
optionalAttrs (publish.orgId != null) {
|
||||
org_id = publish.orgId;
|
||||
}
|
||||
// optionalAttrs (publish.projectId != null) {
|
||||
project_id = publish.projectId;
|
||||
}
|
||||
// optionalAttrs (publish.dns != null) {
|
||||
dns = mkDnsPublicationSpec publish.dns;
|
||||
}
|
||||
// optionalAttrs (publish.loadBalancer != null) {
|
||||
load_balancer = mkLoadBalancerPublicationSpec publish.loadBalancer;
|
||||
};
|
||||
|
||||
mkDeployerServiceSpec = name: service:
|
||||
{
|
||||
inherit name;
|
||||
}
|
||||
// optionalAttrs (service.ports != null && mkServicePorts service.ports != { }) {
|
||||
ports = mkServicePorts service.ports;
|
||||
}
|
||||
// optionalAttrs (service.protocol != null) {
|
||||
protocol = service.protocol;
|
||||
}
|
||||
// optionalAttrs (service.mtlsRequired != null) {
|
||||
mtls_required = service.mtlsRequired;
|
||||
}
|
||||
// optionalAttrs (service.meshMode != null) {
|
||||
mesh_mode = service.meshMode;
|
||||
}
|
||||
// optionalAttrs (service.schedule != null) {
|
||||
schedule = mkServiceScheduleSpec service.schedule;
|
||||
}
|
||||
// optionalAttrs (service.publish != null) {
|
||||
publish = mkServicePublicationSpec service.publish;
|
||||
};
|
||||
|
||||
mkDeployerMtlsPolicySpec = name: policy:
|
||||
{
|
||||
policy_id = name;
|
||||
source_service = policy.sourceService;
|
||||
target_service = policy.targetService;
|
||||
}
|
||||
// optionalAttrs (policy.environment != null) {
|
||||
environment = policy.environment;
|
||||
}
|
||||
// optionalAttrs (policy.mtlsRequired != null) {
|
||||
mtls_required = policy.mtlsRequired;
|
||||
}
|
||||
// optionalAttrs (policy.mode != null) {
|
||||
mode = policy.mode;
|
||||
};
|
||||
|
||||
mkClusterConfig = {
|
||||
cluster,
|
||||
hostname,
|
||||
|
|
@ -729,6 +1403,8 @@ let
|
|||
pools = deployer.pools or { };
|
||||
enrollmentRules = deployer.enrollmentRules or { };
|
||||
hostDeployments = deployer.hostDeployments or { };
|
||||
services = deployer.services or { };
|
||||
mtlsPolicies = deployer.mtlsPolicies or { };
|
||||
in {
|
||||
cluster = {
|
||||
cluster_id = clusterId;
|
||||
|
|
@ -740,9 +1416,9 @@ let
|
|||
pools = map (name: mkDeployerPoolSpec name pools.${name}) (attrNames pools);
|
||||
enrollment_rules = map (name: mkDeployerEnrollmentRuleSpec name enrollmentRules.${name}) (attrNames enrollmentRules);
|
||||
host_deployments = map (name: mkDeployerHostDeploymentSpec name hostDeployments.${name}) (attrNames hostDeployments);
|
||||
services = [ ];
|
||||
services = map (name: mkDeployerServiceSpec name services.${name}) (attrNames services);
|
||||
instances = [ ];
|
||||
mtls_policies = [ ];
|
||||
mtls_policies = map (name: mkDeployerMtlsPolicySpec name mtlsPolicies.${name}) (attrNames mtlsPolicies);
|
||||
};
|
||||
in
|
||||
{
|
||||
|
|
@ -751,6 +1427,20 @@ in
|
|||
mkDesiredSystemType
|
||||
mkHostDeploymentSelectorType
|
||||
mkHostDeploymentType
|
||||
mkServicePortsType
|
||||
mkProcessType
|
||||
mkContainerPortType
|
||||
mkContainerVolumeType
|
||||
mkContainerType
|
||||
mkHealthCheckType
|
||||
mkPlacementPolicyType
|
||||
mkRolloutStrategyType
|
||||
mkServiceScheduleType
|
||||
mkDnsPublicationType
|
||||
mkLoadBalancerPublicationType
|
||||
mkServicePublicationType
|
||||
mkServiceType
|
||||
mkMtlsPolicyType
|
||||
mkNodeType
|
||||
mkNodeClassType
|
||||
mkNodePoolType
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ let
|
|||
nodePoolType = clusterConfigLib.mkNodePoolType types;
|
||||
enrollmentRuleType = clusterConfigLib.mkEnrollmentRuleType types;
|
||||
hostDeploymentType = clusterConfigLib.mkHostDeploymentType types;
|
||||
serviceType = clusterConfigLib.mkServiceType types;
|
||||
mtlsPolicyType = clusterConfigLib.mkMtlsPolicyType types;
|
||||
jsonFormat = pkgs.formats.json { };
|
||||
|
||||
# Generate cluster-config.json for the current node
|
||||
|
|
@ -105,6 +107,18 @@ in {
|
|||
default = { };
|
||||
description = "Declarative host rollout objects derived from Nix";
|
||||
};
|
||||
|
||||
services = mkOption {
|
||||
type = types.attrsOf serviceType;
|
||||
default = { };
|
||||
description = "Scheduler-managed service definitions derived from Nix";
|
||||
};
|
||||
|
||||
mtlsPolicies = mkOption {
|
||||
type = types.attrsOf mtlsPolicyType;
|
||||
default = { };
|
||||
description = "Declarative mTLS policies derived from Nix";
|
||||
};
|
||||
};
|
||||
|
||||
generated = {
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ All VM images are built on the host in a single Nix invocation and then booted a
|
|||
- gateway-node `apigateway`, `nightlight`, and minimal `creditservice` startup
|
||||
- host-forwarded access to the API gateway and NightLight HTTP surfaces
|
||||
- cross-node data replication smoke tests for `chainfire` and `flaredb`
|
||||
- deployer-seeded native runtime scheduling from declarative Nix service definitions, including drain/failover recovery
|
||||
|
||||
## Validation layers
|
||||
|
||||
|
|
|
|||
|
|
@ -201,6 +201,7 @@ in
|
|||
pool = "general";
|
||||
nodeClass = "worker-linux";
|
||||
failureDomain = "zone-b";
|
||||
state = "provisioning";
|
||||
raftPort = 2380;
|
||||
apiPort = 2379;
|
||||
};
|
||||
|
|
@ -214,6 +215,7 @@ in
|
|||
pool = "general";
|
||||
nodeClass = "worker-linux";
|
||||
failureDomain = "zone-c";
|
||||
state = "provisioning";
|
||||
raftPort = 2380;
|
||||
apiPort = 2379;
|
||||
};
|
||||
|
|
@ -273,6 +275,95 @@ in
|
|||
};
|
||||
};
|
||||
};
|
||||
|
||||
services = {
|
||||
native-web = {
|
||||
protocol = "http";
|
||||
ports.http = 18190;
|
||||
schedule = {
|
||||
replicas = 2;
|
||||
placement = {
|
||||
roles = [ "worker" ];
|
||||
pools = [ "general" ];
|
||||
nodeClasses = [ "worker-linux" ];
|
||||
matchLabels = {
|
||||
runtime = "native";
|
||||
};
|
||||
spreadByLabel = "failure_domain";
|
||||
maxInstancesPerNode = 1;
|
||||
};
|
||||
instancePort = 18190;
|
||||
process = {
|
||||
command = "python3";
|
||||
args = [
|
||||
"-m"
|
||||
"http.server"
|
||||
"\${INSTANCE_PORT}"
|
||||
"--bind"
|
||||
"\${INSTANCE_IP}"
|
||||
];
|
||||
};
|
||||
healthCheck = {
|
||||
type = "http";
|
||||
path = "/";
|
||||
intervalSecs = 5;
|
||||
timeoutSecs = 3;
|
||||
};
|
||||
};
|
||||
publish = {
|
||||
dns = {
|
||||
zone = "native.cluster.test";
|
||||
name = "web";
|
||||
ttl = 30;
|
||||
mode = "load_balancer";
|
||||
};
|
||||
loadBalancer = {
|
||||
orgId = "native-services";
|
||||
projectId = "test-cluster";
|
||||
listenerPort = 18191;
|
||||
protocol = "http";
|
||||
poolProtocol = "http";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
native-container = {
|
||||
protocol = "http";
|
||||
ports.http = 18192;
|
||||
schedule = {
|
||||
replicas = 1;
|
||||
placement = {
|
||||
roles = [ "worker" ];
|
||||
pools = [ "general" ];
|
||||
nodeClasses = [ "worker-linux" ];
|
||||
matchLabels = {
|
||||
runtime = "native";
|
||||
};
|
||||
maxInstancesPerNode = 1;
|
||||
};
|
||||
instancePort = 18192;
|
||||
container = {
|
||||
image = "docker.io/library/nginx:1.27-alpine";
|
||||
runtime = "podman";
|
||||
pullPolicy = "if-not-present";
|
||||
ports = [
|
||||
{
|
||||
containerPort = 80;
|
||||
hostPort = 18192;
|
||||
protocol = "tcp";
|
||||
}
|
||||
];
|
||||
};
|
||||
healthCheck = {
|
||||
type = "http";
|
||||
path = "/";
|
||||
intervalSecs = 5;
|
||||
timeoutSecs = 5;
|
||||
startupGraceSecs = 120;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
bootstrap.initialPeers = [ "node01" "node02" "node03" ];
|
||||
|
|
|
|||
|
|
@ -4805,28 +4805,23 @@ validate_deployer_flow() {
|
|||
}
|
||||
|
||||
validate_native_runtime_flow() {
|
||||
log "Validating native deployer + scheduler runtime orchestration"
|
||||
log "Validating native deployer + scheduler runtime orchestration from declarative Nix seed"
|
||||
|
||||
wait_for_unit node04 node-agent
|
||||
wait_for_unit node05 node-agent
|
||||
wait_for_unit node06 fleet-scheduler
|
||||
wait_for_http node06 "http://127.0.0.1:8088/health"
|
||||
|
||||
local tmp_dir native_config drained_config restored_config
|
||||
local chainfire_tunnel_node01="" chainfire_tunnel_node02="" chainfire_tunnel_node03=""
|
||||
local chainfire_endpoint="http://127.0.0.1:12379,http://127.0.0.1:12380,http://127.0.0.1:12381"
|
||||
local iam_tunnel="" lb_tunnel="" token lb_name
|
||||
local native_fresh_healthy_map_expr native_fresh_healthy_count_expr
|
||||
tmp_dir="$(mktemp -d -p "${TMPDIR:-/tmp}" photon-native-runtime-XXXXXX)"
|
||||
native_config="${tmp_dir}/native-runtime.yaml"
|
||||
drained_config="${tmp_dir}/native-runtime-drained.yaml"
|
||||
restored_config="${tmp_dir}/native-runtime-restored.yaml"
|
||||
native_fresh_healthy_map_expr='map(select(.state == "healthy" and (((((.last_heartbeat // .observed_at) // "") | sub("\\.[0-9]+"; "") | sub("\\+00:00$"; "Z") | fromdateiso8601?) // 0) >= (now - 300))))'
|
||||
native_fresh_healthy_count_expr="${native_fresh_healthy_map_expr} | length"
|
||||
chainfire_tunnel_node01="$(start_ssh_tunnel node01 12379 2379 "${NODE_IPS[node01]}")"
|
||||
chainfire_tunnel_node02="$(start_ssh_tunnel node02 12380 2379 "${NODE_IPS[node02]}")"
|
||||
chainfire_tunnel_node03="$(start_ssh_tunnel node03 12381 2379 "${NODE_IPS[node03]}")"
|
||||
trap 'stop_ssh_tunnel node01 "${lb_tunnel}"; stop_ssh_tunnel node01 "${iam_tunnel}"; stop_ssh_tunnel node01 "${chainfire_tunnel_node01}"; stop_ssh_tunnel node02 "${chainfire_tunnel_node02}"; stop_ssh_tunnel node03 "${chainfire_tunnel_node03}"; rm -rf "${tmp_dir}"' RETURN
|
||||
trap 'stop_ssh_tunnel node01 "${lb_tunnel}"; stop_ssh_tunnel node01 "${iam_tunnel}"; stop_ssh_tunnel node01 "${chainfire_tunnel_node01}"; stop_ssh_tunnel node02 "${chainfire_tunnel_node02}"; stop_ssh_tunnel node03 "${chainfire_tunnel_node03}"' RETURN
|
||||
|
||||
native_dump_values() {
|
||||
local prefix="$1"
|
||||
|
|
@ -4879,7 +4874,13 @@ validate_native_runtime_flow() {
|
|||
local instance_value="" node_id=""
|
||||
|
||||
while true; do
|
||||
instance_value="$(native_first_healthy_instance "${service}")"
|
||||
instance_value="$(
|
||||
native_dump_values "photoncloud/clusters/test-cluster/instances/${service}/" \
|
||||
| sed '/^$/d' \
|
||||
| jq -sr \
|
||||
--arg node "${expected_node}" \
|
||||
"${native_fresh_healthy_map_expr} | map(select(.node_id == \$node)) | sort_by(.instance_id) | first"
|
||||
)"
|
||||
node_id="$(printf '%s' "${instance_value}" | jq -r '.node_id // empty')"
|
||||
if [[ "${node_id}" == "${expected_node}" ]]; then
|
||||
printf '%s' "${instance_value}"
|
||||
|
|
@ -4955,373 +4956,22 @@ validate_native_runtime_flow() {
|
|||
done
|
||||
}
|
||||
|
||||
cat >"${native_config}" <<'EOF'
|
||||
cluster:
|
||||
cluster_id: test-cluster
|
||||
environment: test
|
||||
node_classes:
|
||||
- name: worker-linux
|
||||
description: Native runtime worker
|
||||
roles:
|
||||
- worker
|
||||
labels:
|
||||
tier: general
|
||||
runtime: native
|
||||
pools:
|
||||
- name: general
|
||||
description: General-purpose native worker pool
|
||||
node_class: worker-linux
|
||||
labels:
|
||||
pool.photoncloud.io/name: general
|
||||
nodes:
|
||||
- node_id: node04
|
||||
hostname: node04
|
||||
ip: 10.100.0.21
|
||||
roles:
|
||||
- worker
|
||||
labels:
|
||||
runtime: native
|
||||
pool: general
|
||||
node_class: worker-linux
|
||||
failure_domain: zone-b
|
||||
state: provisioning
|
||||
- node_id: node05
|
||||
hostname: node05
|
||||
ip: 10.100.0.22
|
||||
roles:
|
||||
- worker
|
||||
labels:
|
||||
runtime: native
|
||||
pool: general
|
||||
node_class: worker-linux
|
||||
failure_domain: zone-c
|
||||
state: provisioning
|
||||
services:
|
||||
- name: native-web
|
||||
protocol: http
|
||||
ports:
|
||||
http: 18190
|
||||
schedule:
|
||||
replicas: 2
|
||||
placement:
|
||||
roles:
|
||||
- worker
|
||||
pools:
|
||||
- general
|
||||
node_classes:
|
||||
- worker-linux
|
||||
match_labels:
|
||||
runtime: native
|
||||
spread_by_label: failure_domain
|
||||
max_instances_per_node: 1
|
||||
instance_port: 18190
|
||||
process:
|
||||
command: python3
|
||||
args:
|
||||
- -m
|
||||
- http.server
|
||||
- ${INSTANCE_PORT}
|
||||
- --bind
|
||||
- ${INSTANCE_IP}
|
||||
health_check:
|
||||
type: http
|
||||
path: /
|
||||
interval_secs: 5
|
||||
timeout_secs: 3
|
||||
publish:
|
||||
dns:
|
||||
zone: native.cluster.test
|
||||
name: web
|
||||
ttl: 30
|
||||
mode: load_balancer
|
||||
load_balancer:
|
||||
org_id: native-services
|
||||
project_id: test-cluster
|
||||
listener_port: 18191
|
||||
protocol: http
|
||||
pool_protocol: http
|
||||
- name: native-container
|
||||
protocol: http
|
||||
ports:
|
||||
http: 18192
|
||||
schedule:
|
||||
replicas: 1
|
||||
placement:
|
||||
roles:
|
||||
- worker
|
||||
pools:
|
||||
- general
|
||||
node_classes:
|
||||
- worker-linux
|
||||
match_labels:
|
||||
runtime: native
|
||||
max_instances_per_node: 1
|
||||
instance_port: 18192
|
||||
container:
|
||||
image: docker.io/library/nginx:1.27-alpine
|
||||
runtime: podman
|
||||
pull_policy: if-not-present
|
||||
ports:
|
||||
- container_port: 80
|
||||
host_port: 18192
|
||||
protocol: tcp
|
||||
health_check:
|
||||
type: http
|
||||
path: /
|
||||
interval_secs: 5
|
||||
timeout_secs: 5
|
||||
startup_grace_secs: 120
|
||||
instances: []
|
||||
mtls_policies: []
|
||||
EOF
|
||||
|
||||
cat >"${drained_config}" <<'EOF'
|
||||
cluster:
|
||||
cluster_id: test-cluster
|
||||
environment: test
|
||||
node_classes:
|
||||
- name: worker-linux
|
||||
description: Native runtime worker
|
||||
roles:
|
||||
- worker
|
||||
labels:
|
||||
tier: general
|
||||
runtime: native
|
||||
pools:
|
||||
- name: general
|
||||
description: General-purpose native worker pool
|
||||
node_class: worker-linux
|
||||
labels:
|
||||
pool.photoncloud.io/name: general
|
||||
nodes:
|
||||
- node_id: node04
|
||||
hostname: node04
|
||||
ip: 10.100.0.21
|
||||
roles:
|
||||
- worker
|
||||
labels:
|
||||
runtime: native
|
||||
pool: general
|
||||
node_class: worker-linux
|
||||
failure_domain: zone-b
|
||||
state: draining
|
||||
- node_id: node05
|
||||
hostname: node05
|
||||
ip: 10.100.0.22
|
||||
roles:
|
||||
- worker
|
||||
labels:
|
||||
runtime: native
|
||||
pool: general
|
||||
node_class: worker-linux
|
||||
failure_domain: zone-c
|
||||
state: active
|
||||
services:
|
||||
- name: native-web
|
||||
protocol: http
|
||||
ports:
|
||||
http: 18190
|
||||
schedule:
|
||||
replicas: 1
|
||||
placement:
|
||||
roles:
|
||||
- worker
|
||||
pools:
|
||||
- general
|
||||
node_classes:
|
||||
- worker-linux
|
||||
match_labels:
|
||||
runtime: native
|
||||
spread_by_label: failure_domain
|
||||
max_instances_per_node: 1
|
||||
instance_port: 18190
|
||||
process:
|
||||
command: python3
|
||||
args:
|
||||
- -m
|
||||
- http.server
|
||||
- ${INSTANCE_PORT}
|
||||
- --bind
|
||||
- ${INSTANCE_IP}
|
||||
health_check:
|
||||
type: http
|
||||
path: /
|
||||
interval_secs: 5
|
||||
timeout_secs: 3
|
||||
publish:
|
||||
dns:
|
||||
zone: native.cluster.test
|
||||
name: web
|
||||
ttl: 30
|
||||
mode: load_balancer
|
||||
load_balancer:
|
||||
org_id: native-services
|
||||
project_id: test-cluster
|
||||
listener_port: 18191
|
||||
protocol: http
|
||||
pool_protocol: http
|
||||
- name: native-container
|
||||
protocol: http
|
||||
ports:
|
||||
http: 18192
|
||||
schedule:
|
||||
replicas: 1
|
||||
placement:
|
||||
roles:
|
||||
- worker
|
||||
pools:
|
||||
- general
|
||||
node_classes:
|
||||
- worker-linux
|
||||
match_labels:
|
||||
runtime: native
|
||||
max_instances_per_node: 1
|
||||
instance_port: 18192
|
||||
container:
|
||||
image: docker.io/library/nginx:1.27-alpine
|
||||
runtime: podman
|
||||
pull_policy: if-not-present
|
||||
ports:
|
||||
- container_port: 80
|
||||
host_port: 18192
|
||||
protocol: tcp
|
||||
health_check:
|
||||
type: http
|
||||
path: /
|
||||
interval_secs: 5
|
||||
timeout_secs: 5
|
||||
startup_grace_secs: 120
|
||||
instances: []
|
||||
mtls_policies: []
|
||||
EOF
|
||||
|
||||
cat >"${restored_config}" <<'EOF'
|
||||
cluster:
|
||||
cluster_id: test-cluster
|
||||
environment: test
|
||||
node_classes:
|
||||
- name: worker-linux
|
||||
description: Native runtime worker
|
||||
roles:
|
||||
- worker
|
||||
labels:
|
||||
tier: general
|
||||
runtime: native
|
||||
pools:
|
||||
- name: general
|
||||
description: General-purpose native worker pool
|
||||
node_class: worker-linux
|
||||
labels:
|
||||
pool.photoncloud.io/name: general
|
||||
nodes:
|
||||
- node_id: node04
|
||||
hostname: node04
|
||||
ip: 10.100.0.21
|
||||
roles:
|
||||
- worker
|
||||
labels:
|
||||
runtime: native
|
||||
pool: general
|
||||
node_class: worker-linux
|
||||
failure_domain: zone-b
|
||||
state: active
|
||||
- node_id: node05
|
||||
hostname: node05
|
||||
ip: 10.100.0.22
|
||||
roles:
|
||||
- worker
|
||||
labels:
|
||||
runtime: native
|
||||
pool: general
|
||||
node_class: worker-linux
|
||||
failure_domain: zone-c
|
||||
state: active
|
||||
services:
|
||||
- name: native-web
|
||||
protocol: http
|
||||
ports:
|
||||
http: 18190
|
||||
schedule:
|
||||
replicas: 1
|
||||
placement:
|
||||
roles:
|
||||
- worker
|
||||
pools:
|
||||
- general
|
||||
node_classes:
|
||||
- worker-linux
|
||||
match_labels:
|
||||
runtime: native
|
||||
spread_by_label: failure_domain
|
||||
max_instances_per_node: 1
|
||||
instance_port: 18190
|
||||
process:
|
||||
command: python3
|
||||
args:
|
||||
- -m
|
||||
- http.server
|
||||
- ${INSTANCE_PORT}
|
||||
- --bind
|
||||
- ${INSTANCE_IP}
|
||||
health_check:
|
||||
type: http
|
||||
path: /
|
||||
interval_secs: 5
|
||||
timeout_secs: 3
|
||||
publish:
|
||||
dns:
|
||||
zone: native.cluster.test
|
||||
name: web
|
||||
ttl: 30
|
||||
mode: load_balancer
|
||||
load_balancer:
|
||||
org_id: native-services
|
||||
project_id: test-cluster
|
||||
listener_port: 18191
|
||||
protocol: http
|
||||
pool_protocol: http
|
||||
- name: native-container
|
||||
protocol: http
|
||||
ports:
|
||||
http: 18192
|
||||
schedule:
|
||||
replicas: 1
|
||||
placement:
|
||||
roles:
|
||||
- worker
|
||||
pools:
|
||||
- general
|
||||
node_classes:
|
||||
- worker-linux
|
||||
match_labels:
|
||||
runtime: native
|
||||
max_instances_per_node: 1
|
||||
instance_port: 18192
|
||||
container:
|
||||
image: docker.io/library/nginx:1.27-alpine
|
||||
runtime: podman
|
||||
pull_policy: if-not-present
|
||||
ports:
|
||||
- container_port: 80
|
||||
host_port: 18192
|
||||
protocol: tcp
|
||||
health_check:
|
||||
type: http
|
||||
path: /
|
||||
interval_secs: 5
|
||||
timeout_secs: 5
|
||||
startup_grace_secs: 120
|
||||
instances: []
|
||||
mtls_policies: []
|
||||
EOF
|
||||
|
||||
set_native_node_state() {
|
||||
local node_id="$1"
|
||||
local state="$2"
|
||||
run_deployer_ctl \
|
||||
--chainfire-endpoint "${chainfire_endpoint}" \
|
||||
--cluster-id "test-cluster" \
|
||||
--cluster-namespace "photoncloud" \
|
||||
--deployer-namespace "deployer" \
|
||||
apply --config "${native_config}"
|
||||
node set-state --node-id "${node_id}" --state "${state}"
|
||||
}
|
||||
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/services/" \
|
||||
'map(select(.name == "native-web" or .name == "native-container")) | length' \
|
||||
"2" \
|
||||
180
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/nodes/" \
|
||||
'map(select(.labels.runtime == "native" and .state == "active")) | length' \
|
||||
|
|
@ -5390,13 +5040,13 @@ EOF
|
|||
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
||||
wait_for_native_lb_backends "${publication_pool_id}" "2" 180 10.100.0.21 10.100.0.22
|
||||
|
||||
run_deployer_ctl \
|
||||
--chainfire-endpoint "${chainfire_endpoint}" \
|
||||
--cluster-id "test-cluster" \
|
||||
--cluster-namespace "photoncloud" \
|
||||
--deployer-namespace "deployer" \
|
||||
apply --config "${drained_config}"
|
||||
|
||||
log "Draining node04 through deployer lifecycle state"
|
||||
set_native_node_state "node04" "draining"
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/nodes/" \
|
||||
'map(select(.node_id == "node04" and .state == "draining")) | length' \
|
||||
"1" \
|
||||
120
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
||||
'length' \
|
||||
|
|
@ -5433,44 +5083,38 @@ EOF
|
|||
wait_for_native_lb_backends "${publication_pool_id}" "1" 180 10.100.0.22
|
||||
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
||||
|
||||
run_deployer_ctl \
|
||||
--chainfire-endpoint "${chainfire_endpoint}" \
|
||||
--cluster-id "test-cluster" \
|
||||
--cluster-namespace "photoncloud" \
|
||||
--deployer-namespace "deployer" \
|
||||
apply --config "${restored_config}"
|
||||
|
||||
log "Restoring node04 and ensuring capacity returns without moving healthy singleton work"
|
||||
set_native_node_state "node04" "active"
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/nodes/" \
|
||||
'map(select(.node_id == "node04" and .state == "active")) | length' \
|
||||
"1" \
|
||||
120
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
||||
'length' \
|
||||
"1" \
|
||||
"2" \
|
||||
240
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
||||
"${native_fresh_healthy_count_expr}" \
|
||||
"1" \
|
||||
240
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/instances/native-container/" \
|
||||
'length' \
|
||||
"1" \
|
||||
"2" \
|
||||
240
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/instances/native-container/" \
|
||||
"${native_fresh_healthy_count_expr}" \
|
||||
"1" \
|
||||
240
|
||||
local restored_web_value restored_web_node restored_container_value restored_container_node
|
||||
restored_web_value="$(wait_for_native_instance_node "native-web" "node05" 240)"
|
||||
restored_web_node="$(printf '%s' "${restored_web_value}" | jq -r '.node_id')"
|
||||
[[ "${restored_web_node}" == "node05" ]] || die "native-web unexpectedly moved after node04 returned to service"
|
||||
wait_for_native_instance_node "native-web" "node04" 240 >/dev/null
|
||||
wait_for_native_instance_node "native-web" "node05" 240 >/dev/null
|
||||
local restored_container_value restored_container_node
|
||||
restored_container_value="$(wait_for_native_instance_node "native-container" "node05" 240)"
|
||||
restored_container_node="$(printf '%s' "${restored_container_value}" | jq -r '.node_id')"
|
||||
[[ "${restored_container_node}" == "node05" ]] || die "native-container unexpectedly moved after node04 returned to service"
|
||||
publication_value="$(native_publication_state)"
|
||||
publication_pool_id="$(printf '%s' "${publication_value}" | jq -r '.load_balancer.pool_id')"
|
||||
publication_ip="$(printf '%s' "${publication_value}" | jq -r '.dns.value')"
|
||||
wait_for_native_lb_backends "${publication_pool_id}" "1" 180 10.100.0.22
|
||||
wait_for_native_lb_backends "${publication_pool_id}" "2" 180 10.100.0.21 10.100.0.22
|
||||
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
||||
wait_for_http node01 "http://127.0.0.1:18191/" 240
|
||||
|
||||
|
|
@ -5505,24 +5149,42 @@ EOF
|
|||
wait_for_http node04 "http://10.100.0.21:18192/" 240
|
||||
wait_for_http node01 "http://127.0.0.1:18191/" 240
|
||||
|
||||
log "Restarting native worker and ensuring placement stays stable"
|
||||
log "Restarting native worker and ensuring declarative replica count is restored"
|
||||
start_vm node05
|
||||
wait_for_ssh node05
|
||||
wait_for_unit node05 plasmavmc
|
||||
wait_for_unit node05 lightningstor
|
||||
wait_for_unit node05 node-agent
|
||||
|
||||
local recovered_web_value recovered_web_node recovered_container_value recovered_container_node
|
||||
recovered_web_value="$(wait_for_native_instance_node "native-web" "node04" 240)"
|
||||
recovered_web_node="$(printf '%s' "${recovered_web_value}" | jq -r '.node_id')"
|
||||
[[ "${recovered_web_node}" == "node04" ]] || die "native-web unexpectedly churned after node05 recovered"
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/nodes/" \
|
||||
'map(select(.labels.runtime == "native" and .state == "active")) | length' \
|
||||
"2" \
|
||||
240
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
||||
'length' \
|
||||
"2" \
|
||||
240
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/instances/native-web/" \
|
||||
"${native_fresh_healthy_count_expr}" \
|
||||
"2" \
|
||||
240
|
||||
wait_for_native_dump_count \
|
||||
"photoncloud/clusters/test-cluster/instances/native-container/" \
|
||||
"${native_fresh_healthy_count_expr}" \
|
||||
"1" \
|
||||
240
|
||||
wait_for_native_instance_node "native-web" "node04" 240 >/dev/null
|
||||
wait_for_native_instance_node "native-web" "node05" 240 >/dev/null
|
||||
local recovered_container_value recovered_container_node
|
||||
recovered_container_value="$(wait_for_native_instance_node "native-container" "node04" 240)"
|
||||
recovered_container_node="$(printf '%s' "${recovered_container_value}" | jq -r '.node_id')"
|
||||
[[ "${recovered_container_node}" == "node04" ]] || die "native-container unexpectedly churned after node05 recovered"
|
||||
publication_value="$(native_publication_state)"
|
||||
publication_pool_id="$(printf '%s' "${publication_value}" | jq -r '.load_balancer.pool_id')"
|
||||
publication_ip="$(printf '%s' "${publication_value}" | jq -r '.dns.value')"
|
||||
wait_for_native_lb_backends "${publication_pool_id}" "1" 180 10.100.0.21
|
||||
wait_for_native_lb_backends "${publication_pool_id}" "2" 180 10.100.0.21 10.100.0.22
|
||||
wait_for_native_dns_record "${publication_fqdn}" "${publication_ip}" 180
|
||||
wait_for_http node01 "http://127.0.0.1:18191/" 240
|
||||
|
||||
|
|
@ -5532,7 +5194,6 @@ EOF
|
|||
stop_ssh_tunnel node01 "${chainfire_tunnel_node01}"
|
||||
stop_ssh_tunnel node02 "${chainfire_tunnel_node02}"
|
||||
stop_ssh_tunnel node03 "${chainfire_tunnel_node03}"
|
||||
rm -rf "${tmp_dir}"
|
||||
}
|
||||
|
||||
validate_network_provider_matrix() {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue