{ pkgs, serverPkg, clientPkg }: { name = "lightscale-lab-controlplane-restart"; nodes = { node1 = { ... }: { networking.hostName = "node1"; networking.usePredictableInterfaceNames = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.useDHCP = false; networking.interfaces.eth1.ipv4.addresses = [ { address = "10.0.0.1"; prefixLength = 24; } ]; networking.firewall.enable = false; boot.kernelModules = [ "wireguard" ]; environment.systemPackages = [ serverPkg clientPkg pkgs.wireguard-tools pkgs.iproute2 pkgs.iputils pkgs.netcat-openbsd pkgs.curl ]; }; node2 = { ... }: { networking.hostName = "node2"; networking.usePredictableInterfaceNames = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.useDHCP = false; networking.interfaces.eth1.ipv4.addresses = [ { address = "10.0.0.2"; prefixLength = 24; } ]; networking.firewall.enable = false; boot.kernelModules = [ "wireguard" ]; environment.systemPackages = [ clientPkg pkgs.wireguard-tools pkgs.iproute2 pkgs.iputils pkgs.netcat-openbsd pkgs.curl ]; }; node3 = { ... }: { networking.hostName = "node3"; networking.usePredictableInterfaceNames = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.useDHCP = false; networking.interfaces.eth1.ipv4.addresses = [ { address = "10.0.0.3"; prefixLength = 24; } ]; networking.firewall.enable = false; boot.kernelModules = [ "wireguard" ]; environment.systemPackages = [ clientPkg pkgs.wireguard-tools pkgs.iproute2 pkgs.iputils pkgs.netcat-openbsd pkgs.curl ]; }; node4 = { ... }: { networking.hostName = "node4"; networking.usePredictableInterfaceNames = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.useDHCP = false; networking.interfaces.eth1.ipv4.addresses = [ { address = "10.0.0.4"; prefixLength = 24; } ]; networking.firewall.enable = false; boot.kernelModules = [ "wireguard" ]; environment.systemPackages = [ clientPkg pkgs.wireguard-tools pkgs.iproute2 pkgs.iputils pkgs.netcat-openbsd pkgs.curl ]; }; }; testScript = '' start_all() node1.wait_for_unit("multi-user.target") node2.wait_for_unit("multi-user.target") node3.wait_for_unit("multi-user.target") node4.wait_for_unit("multi-user.target") node1.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.1/24'") node2.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.2/24'") node3.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.3/24'") node4.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.4/24'") node1.succeed("touch /tmp/lightscale-server.log") node1.execute("sh -c 'tail -n +1 -f /tmp/lightscale-server.log >/dev/console 2>&1 &'") node1.succeed( "systemd-run --no-block --unit=lightscale-server --service-type=simple " "--property=Restart=on-failure --property=RestartSec=1 " "--property=TimeoutStartSec=30 " "--property=StandardOutput=append:/tmp/lightscale-server.log " "--property=StandardError=append:/tmp/lightscale-server.log " "--setenv=RUST_LOG=info -- " "lightscale-server --listen 10.0.0.1:8080 --state /tmp/lightscale-state.json" ) node1.wait_for_unit("lightscale-server.service") node1.wait_for_open_port(8080, addr="10.0.0.1", timeout=120) import json import time net = json.loads(node1.succeed( "curl -sSf -X POST http://10.0.0.1:8080/v1/networks " "-H 'content-type: application/json' " "-d '{\"name\":\"lab\",\"bootstrap_token_ttl_seconds\":600," \ "\"bootstrap_token_uses\":10,\"bootstrap_token_tags\":[\"lab\"]}'" )) token = net["bootstrap_token"]["token"] def enroll(node, name, ip, state_dir): node.succeed( "lightscale-client --profile test --config /tmp/ls-config.json " "init http://10.0.0.1:8080" ) node.succeed( f"lightscale-client --profile test --config /tmp/ls-config.json " f"--state-dir {state_dir} register --node-name {name} -- {token}" ) node.succeed( f"lightscale-client --profile test --config /tmp/ls-config.json " f"--state-dir {state_dir} heartbeat --endpoint {ip}:51820" ) def start_agent(node, ip, state_dir): node.succeed("touch /tmp/lightscale-agent.log") cmd = ( "lightscale-client --profile test --config /tmp/ls-config.json " f"--state-dir {state_dir} agent --listen-port 51820 " "--heartbeat-interval 5 --longpoll-timeout 5 " f"--endpoint {ip}:51820" ) node.succeed( "systemd-run --no-block --unit=lightscale-agent --service-type=simple " "--property=Restart=on-failure --property=RestartSec=1 " "--property=TimeoutStartSec=30 " "--property=StandardOutput=append:/tmp/lightscale-agent.log " "--property=StandardError=append:/tmp/lightscale-agent.log -- " + cmd ) node.wait_for_unit("lightscale-agent.service") node.wait_until_succeeds("ip link show ls-test", timeout=60) enroll(node2, "node2", "10.0.0.2", "/tmp/ls-state-2") enroll(node3, "node3", "10.0.0.3", "/tmp/ls-state-3") start_agent(node2, "10.0.0.2", "/tmp/ls-state-2") start_agent(node3, "10.0.0.3", "/tmp/ls-state-3") data2 = json.loads(node2.succeed("cat /tmp/ls-state-2/state.json")) data3 = json.loads(node3.succeed("cat /tmp/ls-state-3/state.json")) ip2 = data2["ipv4"] ip3 = data3["ipv4"] node2.wait_until_succeeds(f"ping -c 3 {ip3}", timeout=120) node3.wait_until_succeeds(f"ping -c 3 {ip2}", timeout=120) def restarts(node): return int(node.succeed("systemctl show -p NRestarts --value lightscale-agent.service")) restarts_before = restarts(node2) node1.succeed("systemctl stop lightscale-server.service") node1.wait_until_fails("curl -sSf http://10.0.0.1:8080/healthz") time.sleep(6) node2.wait_until_succeeds(f"ping -c 3 {ip3}", timeout=120) restarts_after = restarts(node2) if restarts_after != restarts_before: raise Exception(f"agent restarted during control plane outage ({restarts_before} -> {restarts_after})") node1.succeed( "systemd-run --no-block --unit=lightscale-server --service-type=simple " "--property=Restart=on-failure --property=RestartSec=1 " "--property=TimeoutStartSec=30 " "--property=StandardOutput=append:/tmp/lightscale-server.log " "--property=StandardError=append:/tmp/lightscale-server.log " "--setenv=RUST_LOG=info -- " "lightscale-server --listen 10.0.0.1:8080 --state /tmp/lightscale-state.json" ) node1.wait_for_unit("lightscale-server.service") node1.wait_for_open_port(8080, addr="10.0.0.1", timeout=120) enroll(node4, "node4", "10.0.0.4", "/tmp/ls-state-4") start_agent(node4, "10.0.0.4", "/tmp/ls-state-4") data4 = json.loads(node4.succeed("cat /tmp/ls-state-4/state.json")) ip4 = data4["ipv4"] node2.wait_until_succeeds(f"ping -c 3 {ip4}", timeout=180) ''; }