photoncloud-monorepo/nix/tests/deployer-vm-smoke.nix

403 lines
15 KiB
Nix

{
pkgs,
photoncloudPackages,
smokeTargetToplevel,
desiredSystemOverrides ? { },
expectedStatus ? "active",
expectCurrentSystemMatchesTarget ? true,
expectMarkerPresent ? true,
}:
let
desiredSystemOverridesJson = builtins.toJSON desiredSystemOverrides;
in
{
name = "deployer-vm-smoke";
nodes = {
deployer =
{ ... }:
{
imports = [
../modules/chainfire.nix
../modules/deployer.nix
];
networking.hostName = "deployer";
networking.firewall.enable = false;
networking.nameservers = [ "10.0.2.3" ];
nix.settings.experimental-features = [
"nix-command"
"flakes"
];
services.chainfire = {
enable = true;
nodeId = "deployer01";
package = photoncloudPackages.chainfire-server;
};
services.deployer = {
enable = true;
package = photoncloudPackages.deployer-server;
ctlPackage = photoncloudPackages.deployer-ctl;
bindAddr = "0.0.0.0:8088";
chainfireEndpoints = [ "http://127.0.0.1:2379" ];
clusterId = "vm-smoke";
bootstrapToken = "vm-smoke-bootstrap-token";
adminToken = "vm-smoke-admin-token";
requireChainfire = true;
allowUnknownNodes = false;
allowUnauthenticated = false;
bootstrapFlakeBundle = photoncloudPackages.plasmacloudFlakeBundle;
};
environment.systemPackages = with pkgs; [
curl
gnutar
gzip
jq
photoncloudPackages.deployer-ctl
];
virtualisation.memorySize = 1536;
virtualisation.diskSize = 4096;
system.stateVersion = "24.11";
};
worker =
{ ... }:
{
networking.hostName = "worker";
networking.firewall.enable = false;
networking.nameservers = [ "10.0.2.3" ];
nix.settings = {
experimental-features = [
"nix-command"
"flakes"
];
substituters = [ ];
};
environment.systemPackages = with pkgs; [
curl
gnutar
gzip
jq
photoncloudPackages.deployer-ctl
photoncloudPackages.nix-agent
];
virtualisation.memorySize = 4096;
virtualisation.diskSize = 20480;
virtualisation.additionalPaths = [ smokeTargetToplevel ];
system.stateVersion = "24.11";
};
};
testScript = ''
import json
import tempfile
import time
desired_system_overrides = json.loads("""${desiredSystemOverridesJson}""")
def write_remote_json(machine, path, payload):
machine.succeed(
"cat >{path} <<'EOF'\n{payload}\nEOF".format(
path=path,
payload=json.dumps(payload, indent=2, sort_keys=True),
)
)
start_all()
serial_stdout_off()
with tempfile.TemporaryDirectory(prefix="deployer-vm-smoke-"):
deployer.wait_for_unit("chainfire.service")
deployer.wait_for_unit("deployer.service")
deployer.wait_for_open_port(2379)
deployer.wait_for_open_port(8088)
deployer_ip = worker.succeed("getent ahostsv4 deployer | awk '{print $1; exit}'").strip()
assert deployer_ip, "deployer did not report an IP address"
worker_machine_id = worker.succeed("cat /etc/machine-id").strip()
worker_ip = worker.succeed("hostname -I | awk '{print $1}'").strip()
assert worker_ip, "worker did not report an IP address"
cluster_state = {
"cluster": {
"cluster_id": "vm-smoke",
"environment": "test",
},
"nodes": [
{
"node_id": "worker",
"machine_id": worker_machine_id,
"hostname": "worker",
"ip": worker_ip,
"roles": ["worker"],
"labels": {
"tier": "general",
},
"pool": "general",
"node_class": "worker-linux",
"failure_domain": "lab-a",
"install_plan": {
"nixos_configuration": "vm-smoke-target",
"target_disk": "/dev/vda",
},
"desired_system": {
"nixos_configuration": "vm-smoke-target",
**desired_system_overrides,
},
"state": "pending",
}
],
"node_classes": [
{
"name": "worker-linux",
"description": "General-purpose worker profile for VM smoke tests",
"roles": ["worker"],
"labels": {
"tier": "general",
},
}
],
"pools": [
{
"name": "general",
"description": "General-purpose worker pool",
"node_class": "worker-linux",
"labels": {
"pool.photoncloud.io/name": "general",
},
}
],
"enrollment_rules": [],
"services": [],
"instances": [],
"mtls_policies": [],
}
write_remote_json(deployer, "/tmp/cluster-state.json", cluster_state)
deployer.succeed(
"deployer-ctl "
"--chainfire-endpoint http://127.0.0.1:2379 "
"--cluster-id vm-smoke "
"--cluster-namespace photoncloud "
"--deployer-namespace deployer "
"apply --config /tmp/cluster-state.json --prune",
timeout=120,
)
print("cluster_state_applied")
worker.succeed(
"curl -fsS "
"-H 'x-deployer-token: vm-smoke-bootstrap-token' "
"http://{deployer_ip}:8088/api/v1/bootstrap/flake-bundle "
"-o /tmp/plasmacloud-flake-bundle.tar.gz".format(
deployer_ip=deployer_ip,
),
timeout=120,
)
print("bundle_downloaded")
worker.succeed("mkdir -p /var/lib/photon-src", timeout=30)
worker.succeed("tar xzf /tmp/plasmacloud-flake-bundle.tar.gz -C /var/lib/photon-src", timeout=180)
print("bundle_extracted")
worker.succeed("test -f /var/lib/photon-src/flake.nix")
worker.succeed("test -d /var/lib/photon-src/nix")
worker.succeed("test -d /var/lib/photon-src/.bundle-inputs/nixpkgs")
worker.succeed("test -d /var/lib/photon-src/.bundle-inputs/rust-overlay")
worker.succeed("test -d /var/lib/photon-src/.bundle-inputs/flake-utils")
worker.succeed("test -d /var/lib/photon-src/.bundle-inputs/disko")
worker.succeed("test -d /var/lib/photon-src/.bundle-inputs/systems")
phone_home_request = {
"machine_id": worker_machine_id,
"node_id": "worker",
"ip": worker_ip,
"metadata": {
"rack": "lab-a",
"sku": "vm-smoke",
},
"hardware_facts": {
"architecture": "x86_64",
"cpu_model": "NixOS Test CPU",
"cpu_threads": 4,
"cpu_cores": 2,
"memory_bytes": 2147483648,
"disks": [
{
"name": "vda",
"path": "/dev/vda",
"by_id": "/dev/disk/by-id/virtio-vm-smoke-root",
"size_bytes": 21474836480,
"model": "QEMU HARDDISK",
"serial": "vm-smoke-root",
"rotational": False,
}
],
"nics": [
{
"name": "eth0",
"mac_address": "52:54:00:12:34:56",
"oper_state": "up",
}
],
},
}
write_remote_json(worker, "/tmp/phone-home.json", phone_home_request)
phone_home_response = worker.succeed(
"curl -fsS "
"-H 'content-type: application/json' "
"-H 'x-deployer-token: vm-smoke-bootstrap-token' "
"--data @/tmp/phone-home.json "
"http://{deployer_ip}:8088/api/v1/phone-home".format(
deployer_ip=deployer_ip,
),
timeout=120,
)
phone_home_payload = json.loads(phone_home_response)
assert phone_home_payload["node_id"] == "worker"
assert phone_home_payload["node_config"]["install_plan"]["nixos_configuration"] == "vm-smoke-target"
assert phone_home_payload["node_config"]["install_plan"]["target_disk"] == "/dev/vda"
print("phone_home_complete")
node_dump_output = deployer.succeed(
"deployer-ctl "
"--chainfire-endpoint http://127.0.0.1:2379 "
"--cluster-id vm-smoke "
"--cluster-namespace photoncloud "
"--deployer-namespace deployer "
"dump --prefix photoncloud/clusters/vm-smoke/nodes/worker --format json"
)
node_entries = [json.loads(line) for line in node_dump_output.splitlines() if line.strip()]
node_record = next(entry["value"] for entry in node_entries if entry["key"].endswith("/nodes/worker"))
print("node_record=", json.dumps(node_record, sort_keys=True))
assert node_record["hardware_facts"]["architecture"] == "x86_64"
assert node_record["hardware_facts"]["disks"][0]["by_id"] == "/dev/disk/by-id/virtio-vm-smoke-root"
assert node_record["labels"]["hardware.architecture"] == "x86_64"
assert node_record["labels"]["hardware.disk_count"] == "1"
worker.succeed(
"${photoncloudPackages.deployer-ctl}/bin/deployer-ctl "
"--chainfire-endpoint http://{deployer_ip}:2379 "
"--cluster-id vm-smoke "
"--cluster-namespace photoncloud "
"--deployer-namespace deployer "
"dump --prefix photoncloud/clusters/vm-smoke/nodes/worker --format json >/tmp/worker-chainfire-preflight.json".format(
deployer_ip=deployer_ip,
),
timeout=120,
)
print("chainfire_preflight_complete")
worker.succeed("rm -f /tmp/vm-smoke-nix-agent.log")
worker.succeed(
"systemd-run "
"--no-block "
"--unit vm-smoke-nix-agent "
"--service-type=exec "
"--property=StandardOutput=append:/tmp/vm-smoke-nix-agent.log "
"--property=StandardError=append:/tmp/vm-smoke-nix-agent.log "
"--setenv=PATH=/run/current-system/sw/bin "
"--setenv=RUST_LOG=info "
"-- "
"${photoncloudPackages.nix-agent}/bin/nix-agent "
"--apply "
"--once "
"--chainfire-endpoint http://{deployer_ip}:2379 "
"--cluster-namespace photoncloud "
"--cluster-id vm-smoke "
"--node-id worker "
"--flake-root /var/lib/photon-src".format(
deployer_ip=deployer_ip,
),
timeout=60,
)
worker.wait_until_succeeds(
"systemctl show -P ActiveState vm-smoke-nix-agent.service | grep -Eq 'active|inactive|failed'",
timeout=60,
)
def read_observed_system():
observed_dump_output = deployer.succeed(
"deployer-ctl "
"--chainfire-endpoint http://127.0.0.1:2379 "
"--cluster-id vm-smoke "
"--cluster-namespace photoncloud "
"--deployer-namespace deployer "
"dump --prefix photoncloud/clusters/vm-smoke/nodes/worker/observed-system --format json"
)
observed_entries = [json.loads(line) for line in observed_dump_output.splitlines() if line.strip()]
if not observed_entries:
return None
return observed_entries[0]["value"]
observed = None
last_observed_snapshot = None
next_nix_agent_log_dump = time.time() + 30
deadline = time.time() + 900
while time.time() < deadline:
observed = read_observed_system()
if observed is None:
if time.time() >= next_nix_agent_log_dump:
print(
"nix_agent_log_tail=",
worker.succeed("tail -n 50 /tmp/vm-smoke-nix-agent.log || true"),
)
next_nix_agent_log_dump += 30
time.sleep(2)
continue
observed_snapshot = json.dumps(observed, sort_keys=True)
if observed_snapshot != last_observed_snapshot:
print("observed_system=", observed_snapshot)
last_observed_snapshot = observed_snapshot
status = observed.get("status")
if status in ("active", "failed", "rolled-back"):
break
if time.time() >= next_nix_agent_log_dump:
print(
"nix_agent_log_tail=",
worker.succeed("tail -n 50 /tmp/vm-smoke-nix-agent.log || true"),
)
next_nix_agent_log_dump += 30
time.sleep(5)
assert observed is not None, "observed-system was never read"
nix_agent_state_output = worker.succeed(
"systemctl show "
"-P ActiveState "
"-P SubState "
"-P Result "
"-P ExecMainStatus "
"vm-smoke-nix-agent.service || true"
)
nix_agent_status_output = worker.succeed(
"systemctl status vm-smoke-nix-agent.service --no-pager || true"
)
nix_agent_log_output = worker.succeed("cat /tmp/vm-smoke-nix-agent.log || true")
print("nix_agent_state=", nix_agent_state_output)
print("nix_agent_systemd_status=", nix_agent_status_output)
print("nix_agent_log=", nix_agent_log_output)
print("observed_system=", json.dumps(observed, sort_keys=True))
assert observed["status"] == "${expectedStatus}", observed
assert observed["nixos_configuration"] == "vm-smoke-target"
assert observed["flake_root"] == "/var/lib/photon-src"
assert observed["target_system"].startswith("/nix/store/")
current_system = worker.succeed("readlink -f /run/current-system").strip()
print("worker_current_system=", current_system)
if ${if expectCurrentSystemMatchesTarget then "True" else "False"}:
assert current_system == observed["target_system"], (current_system, observed)
else:
assert current_system != observed["target_system"], (current_system, observed)
assert current_system == observed["rollback_system"], (current_system, observed)
if ${if expectMarkerPresent then "True" else "False"}:
worker.succeed("test -f /run/current-system/etc/photon-vm-smoke-target")
else:
worker.succeed("test ! -f /run/current-system/etc/photon-vm-smoke-target")
'';
}