From 559d6d2c6e6b27a3ec874bfc9c425231fd1f43c5 Mon Sep 17 00:00:00 2001 From: centra
Date: Tue, 24 Feb 2026 02:41:01 +0900 Subject: [PATCH] Switch submodule remotes to GitHub mirrors --- .github/workflows/linux-lab.yml | 60 +++++++++ .gitmodules | 4 +- lab/run-linux-suite.sh | 49 ++++++++ lab/test-auth-url.nix | 142 +++++++++++++++++++++ lab/test-nat-churn.nix | 215 ++++++++++++++++++++++++++++++++ lab/test-netem.nix | 202 ++++++++++++++++++++++++++++++ lab/test-relay-switch.nix | 156 +++++++++++++++++++++++ lab/test-simple-autoreg.nix | 197 +++++++++++++++++++++++++++++ lab/test-soak.nix | 157 +++++++++++++++++++++++ lab/test-standalone.nix | 138 ++++++++++++++++++++ 10 files changed, 1318 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/linux-lab.yml create mode 100755 lab/run-linux-suite.sh create mode 100644 lab/test-auth-url.nix create mode 100644 lab/test-nat-churn.nix create mode 100644 lab/test-netem.nix create mode 100644 lab/test-relay-switch.nix create mode 100644 lab/test-simple-autoreg.nix create mode 100644 lab/test-soak.nix create mode 100644 lab/test-standalone.nix diff --git a/.github/workflows/linux-lab.yml b/.github/workflows/linux-lab.yml new file mode 100644 index 0000000..414ed3e --- /dev/null +++ b/.github/workflows/linux-lab.yml @@ -0,0 +1,60 @@ +name: linux-lab + +on: + push: + branches: + - main + - master + pull_request: + workflow_dispatch: + +permissions: + contents: read + +jobs: + core: + name: core-${{ matrix.mode }} + runs-on: ubuntu-24.04 + timeout-minutes: 180 + strategy: + fail-fast: false + matrix: + mode: [fast, nat, netem] + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install Nix + uses: cachix/install-nix-action@v31 + with: + extra_nix_config: | + experimental-features = nix-command flakes + + - name: Run core lab mode + run: ./lab/run.sh ${{ matrix.mode }} + + extended: + if: github.event_name == 'workflow_dispatch' + name: extended-${{ matrix.mode }} + runs-on: ubuntu-24.04 + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + mode: [auth-url, nat-churn, relay-switch, standalone, soak] + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install Nix + uses: cachix/install-nix-action@v31 + with: + extra_nix_config: | + experimental-features = nix-command flakes + + - name: Run extended lab mode + run: ./lab/run.sh ${{ matrix.mode }} diff --git a/.gitmodules b/.gitmodules index 957a7a2..d3f2efc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "lightscale-client"] path = lightscale-client - url = https://git.centraworks.net/centra/lightscale-client.git + url = https://github.com/CentRa-Linux/lightscale-client.git [submodule "lightscale-server"] path = lightscale-server - url = https://git.centraworks.net/centra/lightscale-server.git + url = https://github.com/CentRa-Linux/lightscale-server.git diff --git a/lab/run-linux-suite.sh b/lab/run-linux-suite.sh new file mode 100755 index 0000000..70b2bd3 --- /dev/null +++ b/lab/run-linux-suite.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) +cd "$ROOT_DIR" + +SUITE="core" +WITH_ADMIN=0 + +for arg in "$@"; do + case "$arg" in + core|extended|all) + SUITE="$arg" + ;; + --with-admin) + WITH_ADMIN=1 + ;; + *) + echo "unknown argument: $arg" >&2 + echo "usage: ./lab/run-linux-suite.sh [core|extended|all] [--with-admin]" >&2 + exit 1 + ;; + esac +done + +CORE_MODES=(fast nat netem) +EXTENDED_MODES=(auth-url nat-churn relay-switch standalone soak) + +case "$SUITE" in + core) + MODES=("${CORE_MODES[@]}") + ;; + extended) + MODES=("${EXTENDED_MODES[@]}") + ;; + all) + MODES=("${CORE_MODES[@]}" "${EXTENDED_MODES[@]}") + ;; +esac + +if [[ "$WITH_ADMIN" == "1" ]]; then + MODES+=(admin) +fi + +for mode in "${MODES[@]}"; do + echo "=== running lab mode: $mode ===" + ./lab/run.sh "$mode" + echo "=== finished lab mode: $mode ===" +done diff --git a/lab/test-auth-url.nix b/lab/test-auth-url.nix new file mode 100644 index 0000000..ac3bb16 --- /dev/null +++ b/lab/test-auth-url.nix @@ -0,0 +1,142 @@ +{ pkgs, serverPkg, clientPkg }: +{ + name = "lightscale-lab-auth-url"; + nodes = { + node1 = { ... }: { + networking.hostName = "node1"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.1"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + serverPkg + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.netcat-openbsd + pkgs.curl + ]; + }; + node2 = { ... }: { + networking.hostName = "node2"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.2"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.curl + ]; + }; + }; + + testScript = '' + start_all() + node1.wait_for_unit("multi-user.target") + node2.wait_for_unit("multi-user.target") + node1.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.1/24'") + node2.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.2/24'") + + node1.succeed("touch /tmp/lightscale-server.log") + node1.execute("sh -c 'tail -n +1 -f /tmp/lightscale-server.log >/dev/console 2>&1 &'") + node1.succeed( + "systemd-run --no-block --unit=lightscale-server --service-type=simple " + "--property=Restart=on-failure --property=RestartSec=1 " + "--property=TimeoutStartSec=30 " + "--property=StandardOutput=append:/tmp/lightscale-server.log " + "--property=StandardError=append:/tmp/lightscale-server.log " + "--setenv=RUST_LOG=info --setenv=LIGHTSCALE_ADMIN_TOKEN=test-admin -- " + "lightscale-server --listen 10.0.0.1:8080 --state /tmp/lightscale-state.json" + ) + node1.wait_for_unit("lightscale-server.service") + node1.wait_for_open_port(8080, addr="10.0.0.1", timeout=120) + + import json + + net = json.loads(node1.succeed( + "curl -sSf -X POST http://10.0.0.1:8080/v1/networks " + "-H 'authorization: Bearer test-admin' " + "-H 'content-type: application/json' " + "-d '{\"name\":\"auth-url\",\"bootstrap_token_ttl_seconds\":600," + "\"bootstrap_token_uses\":10,\"bootstrap_token_tags\":[\"auth-url\"]}'" + )) + token = net["bootstrap_token"]["token"] + network_id = net["network"]["id"] + + node1.succeed( + "lightscale-client --profile auth --config /tmp/ls-config.json " + "init http://10.0.0.1:8080" + ) + node1.succeed( + f"lightscale-client --profile auth --config /tmp/ls-config.json " + f"--state-dir /tmp/ls-state register --node-name node1 -- {token}" + ) + + node2.succeed( + "lightscale-client --profile auth --config /tmp/ls-config.json " + "init http://10.0.0.1:8080" + ) + node2.succeed( + "sh -c 'lightscale-client --profile auth --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state register-url " + + network_id + + " --node-name node2 --approve | tee /tmp/register-url.out'" + ) + node2.succeed("grep -q 'approved=true' /tmp/register-url.out") + node2.succeed( + "lightscale-client --profile auth --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state status | grep -q 'approved: true'" + ) + + node1.succeed( + "lightscale-client --profile auth --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state heartbeat --endpoint 10.0.0.1:51820" + ) + node2.succeed( + "lightscale-client --profile auth --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state heartbeat --endpoint 10.0.0.2:51820" + ) + + def start_agent(node, endpoint): + node.succeed("touch /tmp/lightscale-agent.log") + cmd = ( + "lightscale-client --profile auth --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state agent --listen-port 51820 " + "--heartbeat-interval 5 --longpoll-timeout 5 " + f"--endpoint {endpoint}" + ) + node.succeed( + "systemd-run --no-block --unit=lightscale-agent --service-type=simple " + "--property=Restart=on-failure --property=RestartSec=1 " + "--property=TimeoutStartSec=30 " + "--property=StandardOutput=append:/tmp/lightscale-agent.log " + "--property=StandardError=append:/tmp/lightscale-agent.log -- " + + cmd + ) + node.wait_for_unit("lightscale-agent.service") + node.wait_until_succeeds("ip link show ls-auth", timeout=60) + + start_agent(node1, "10.0.0.1:51820") + start_agent(node2, "10.0.0.2:51820") + + data1 = json.loads(node1.succeed("cat /tmp/ls-state/state.json")) + data2 = json.loads(node2.succeed("cat /tmp/ls-state/state.json")) + ip1 = data1["ipv4"] + ip2 = data2["ipv4"] + + node1.wait_until_succeeds(f"ping -c 3 {ip2}", timeout=180) + node2.wait_until_succeeds(f"ping -c 3 {ip1}", timeout=180) + ''; +} diff --git a/lab/test-nat-churn.nix b/lab/test-nat-churn.nix new file mode 100644 index 0000000..ee5264e --- /dev/null +++ b/lab/test-nat-churn.nix @@ -0,0 +1,215 @@ +{ pkgs, serverPkg, clientPkg }: +{ + name = "lightscale-lab-nat-churn"; + nodes = { + node1 = { ... }: { + networking.hostName = "node1"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.1"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + serverPkg + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.netcat-openbsd + pkgs.curl + ]; + }; + + natgw = { ... }: { + networking.hostName = "natgw"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 2 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.2"; prefixLength = 24; } + ]; + networking.interfaces.eth2.useDHCP = false; + networking.interfaces.eth2.ipv4.addresses = [ + { address = "192.168.60.1"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernel.sysctl."net.ipv4.ip_forward" = 1; + environment.systemPackages = [ + pkgs.iproute2 + pkgs.iputils + pkgs.iptables + pkgs.conntrack-tools + ]; + }; + + node3 = { ... }: { + networking.hostName = "node3"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 2 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "192.168.60.10"; prefixLength = 24; } + ]; + networking.defaultGateway = { + address = "192.168.60.1"; + interface = "eth1"; + }; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.curl + ]; + }; + }; + + testScript = '' + start_all() + node1.wait_for_unit("multi-user.target") + natgw.wait_for_unit("multi-user.target") + node3.wait_for_unit("multi-user.target") + + node1.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.1/24'") + natgw.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.2/24'") + natgw.wait_until_succeeds("ip -4 addr show dev eth2 | grep -q '192.168.60.1/24'") + node3.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '192.168.60.10/24'") + + natgw.succeed("iptables -P FORWARD ACCEPT") + natgw.succeed("iptables -F") + natgw.succeed("iptables -t nat -F") + + def set_nat_port(port): + natgw.succeed("iptables -t nat -F") + natgw.succeed( + f"iptables -t nat -A PREROUTING -i eth1 -p udp --dport {port} " + "-j DNAT --to-destination 192.168.60.10:51820" + ) + natgw.succeed( + f"iptables -t nat -A POSTROUTING -o eth1 -p udp -s 192.168.60.10 " + f"--sport 51820 -j SNAT --to-source 10.0.0.2:{port}" + ) + natgw.succeed("iptables -t nat -A POSTROUTING -o eth1 -j MASQUERADE") + natgw.execute("conntrack -F || true") + + set_nat_port(40000) + + # Very short UDP conntrack timeout to emulate consumer NAT churn. + natgw.succeed("sysctl -w net.netfilter.nf_conntrack_udp_timeout=5") + natgw.succeed("sysctl -w net.netfilter.nf_conntrack_udp_timeout_stream=5") + + node1.succeed("touch /tmp/lightscale-server.log") + node1.execute("sh -c 'tail -n +1 -f /tmp/lightscale-server.log >/dev/console 2>&1 &'") + node1.succeed( + "systemd-run --no-block --unit=lightscale-server --service-type=simple " + "--property=Restart=on-failure --property=RestartSec=1 " + "--property=TimeoutStartSec=30 " + "--property=StandardOutput=append:/tmp/lightscale-server.log " + "--property=StandardError=append:/tmp/lightscale-server.log " + "--setenv=RUST_LOG=info --setenv=LIGHTSCALE_ADMIN_TOKEN=test-admin -- " + "lightscale-server --listen 10.0.0.1:8080 --state /tmp/lightscale-state.json" + ) + node1.wait_for_unit("lightscale-server.service") + node1.wait_for_open_port(8080, addr="10.0.0.1", timeout=120) + + import json + + net = json.loads(node1.succeed( + "curl -sSf -X POST http://10.0.0.1:8080/v1/networks " + "-H 'authorization: Bearer test-admin' " + "-H 'content-type: application/json' " + "-d '{\"name\":\"nat-churn\",\"bootstrap_token_ttl_seconds\":600," + "\"bootstrap_token_uses\":10,\"bootstrap_token_tags\":[\"nat-churn\"]}'" + )) + token = net["bootstrap_token"]["token"] + + def enroll(node, name, endpoints): + node.succeed( + "lightscale-client --profile natchurn --config /tmp/ls-config.json " + "init http://10.0.0.1:8080" + ) + node.succeed( + f"lightscale-client --profile natchurn --config /tmp/ls-config.json " + f"--state-dir /tmp/ls-state register --node-name {name} -- {token}" + ) + cmd = ( + "lightscale-client --profile natchurn --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state heartbeat" + ) + for endpoint in endpoints: + cmd += f" --endpoint {endpoint}" + node.succeed(cmd) + + enroll(node1, "node1", ["10.0.0.1:51820"]) + enroll(node3, "node3", ["10.0.0.2:40000", "10.0.0.2:41000", "10.0.0.2:42000", "10.0.0.2:43000"]) + + def start_agent(node, endpoints): + node.succeed("touch /tmp/lightscale-agent.log") + cmd = ( + "lightscale-client --profile natchurn --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state agent --listen-port 51820 " + "--heartbeat-interval 5 --longpoll-timeout 5 " + "--endpoint-stale-after 5 --endpoint-max-rotations 2" + ) + for endpoint in endpoints: + cmd += f" --endpoint {endpoint}" + node.succeed( + "systemd-run --no-block --unit=lightscale-agent --service-type=simple " + "--property=Restart=on-failure --property=RestartSec=1 " + "--property=TimeoutStartSec=30 " + "--property=StandardOutput=append:/tmp/lightscale-agent.log " + "--property=StandardError=append:/tmp/lightscale-agent.log -- " + + cmd + ) + node.wait_for_unit("lightscale-agent.service") + node.wait_until_succeeds("ip link show ls-natchurn", timeout=60) + + start_agent(node1, ["10.0.0.1:51820"]) + start_agent(node3, ["10.0.0.2:40000", "10.0.0.2:41000", "10.0.0.2:42000", "10.0.0.2:43000"]) + + data1 = json.loads(node1.succeed("cat /tmp/ls-state/state.json")) + data3 = json.loads(node3.succeed("cat /tmp/ls-state/state.json")) + node1_ip = data1["ipv4"] + node3_ip = data3["ipv4"] + + def eventually_ping(src, dst, timeout=180): + src.wait_until_succeeds( + f"for i in $(seq 1 20); do ping -c 1 -W 1 {dst} && exit 0; sleep 1; done; exit 1", + timeout=timeout, + ) + + eventually_ping(node1, node3_ip) + eventually_ping(node3, node1_ip) + + # Wait beyond short conntrack timeout and verify it re-establishes. + natgw.succeed("sleep 7") + natgw.execute("conntrack -L -p udp || true") + eventually_ping(node1, node3_ip) + eventually_ping(node3, node1_ip) + + # Rebind multiple times. + for port in [41000, 42000]: + set_nat_port(port) + node1.wait_until_succeeds(f"wg show ls-natchurn endpoints | grep -q ':{port}'", timeout=180) + eventually_ping(node1, node3_ip) + eventually_ping(node3, node1_ip) + + # Short full outage, then recover on a fresh mapped port. + natgw.succeed("iptables -I FORWARD -p udp --dport 51820 -j DROP") + natgw.succeed("iptables -I FORWARD -p udp --sport 51820 -j DROP") + node1.execute(f"ping -c 2 {node3_ip} || true") + natgw.succeed("sleep 10") + natgw.succeed("iptables -D FORWARD -p udp --dport 51820 -j DROP") + natgw.succeed("iptables -D FORWARD -p udp --sport 51820 -j DROP") + + set_nat_port(43000) + node1.wait_until_succeeds("wg show ls-natchurn endpoints | grep -q ':43000'", timeout=240) + eventually_ping(node1, node3_ip, timeout=240) + eventually_ping(node3, node1_ip, timeout=240) + ''; +} diff --git a/lab/test-netem.nix b/lab/test-netem.nix new file mode 100644 index 0000000..51a096b --- /dev/null +++ b/lab/test-netem.nix @@ -0,0 +1,202 @@ +{ pkgs, serverPkg, clientPkg }: +{ + name = "lightscale-lab-netem"; + nodes = { + node1 = { ... }: { + networking.hostName = "node1"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.1"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + serverPkg + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.netcat-openbsd + pkgs.curl + ]; + }; + + natgw = { ... }: { + networking.hostName = "natgw"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 2 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.2"; prefixLength = 24; } + ]; + networking.interfaces.eth2.useDHCP = false; + networking.interfaces.eth2.ipv4.addresses = [ + { address = "192.168.50.1"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernel.sysctl."net.ipv4.ip_forward" = 1; + environment.systemPackages = [ + pkgs.iproute2 + pkgs.iputils + pkgs.iptables + pkgs.conntrack-tools + ]; + }; + + node3 = { ... }: { + networking.hostName = "node3"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 2 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "192.168.50.10"; prefixLength = 24; } + ]; + networking.defaultGateway = { + address = "192.168.50.1"; + interface = "eth1"; + }; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.curl + ]; + }; + }; + + testScript = '' + start_all() + node1.wait_for_unit("multi-user.target") + natgw.wait_for_unit("multi-user.target") + node3.wait_for_unit("multi-user.target") + + node1.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.1/24'") + natgw.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.2/24'") + natgw.wait_until_succeeds("ip -4 addr show dev eth2 | grep -q '192.168.50.1/24'") + node3.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '192.168.50.10/24'") + node3.wait_until_succeeds("ip -4 route show default | grep -q 'via 192.168.50.1'") + + natgw.succeed("iptables -P FORWARD ACCEPT") + natgw.succeed("iptables -F") + natgw.succeed("iptables -t nat -F") + + def set_nat_port(port): + natgw.succeed("iptables -t nat -F") + natgw.succeed( + f"iptables -t nat -A PREROUTING -i eth1 -p udp --dport {port} " + "-j DNAT --to-destination 192.168.50.10:51820" + ) + natgw.succeed( + f"iptables -t nat -A POSTROUTING -o eth1 -p udp -s 192.168.50.10 " + f"--sport 51820 -j SNAT --to-source 10.0.0.2:{port}" + ) + natgw.succeed("iptables -t nat -A POSTROUTING -o eth1 -j MASQUERADE") + natgw.execute("conntrack -F || true") + + set_nat_port(40000) + + node1.succeed("touch /tmp/lightscale-server.log") + node1.execute("sh -c 'tail -n +1 -f /tmp/lightscale-server.log >/dev/console 2>&1 &'") + node1.succeed( + "systemd-run --no-block --unit=lightscale-server --service-type=simple " + "--property=Restart=on-failure --property=RestartSec=1 " + "--property=TimeoutStartSec=30 " + "--property=StandardOutput=append:/tmp/lightscale-server.log " + "--property=StandardError=append:/tmp/lightscale-server.log " + "--setenv=RUST_LOG=info --setenv=LIGHTSCALE_ADMIN_TOKEN=test-admin -- " + "lightscale-server --listen 10.0.0.1:8080 --state /tmp/lightscale-state.json" + ) + node1.wait_for_unit("lightscale-server.service") + node1.wait_for_open_port(8080, addr="10.0.0.1", timeout=120) + + import json + + net = json.loads(node1.succeed( + "curl -sSf -X POST http://10.0.0.1:8080/v1/networks " + "-H 'authorization: Bearer test-admin' " + "-H 'content-type: application/json' " + "-d '{\"name\":\"netem\",\"bootstrap_token_ttl_seconds\":600," + "\"bootstrap_token_uses\":10,\"bootstrap_token_tags\":[\"netem\"]}'" + )) + token = net["bootstrap_token"]["token"] + + def enroll(node, name, endpoints): + node.succeed( + "lightscale-client --profile netem --config /tmp/ls-config.json " + "init http://10.0.0.1:8080" + ) + node.succeed( + f"lightscale-client --profile netem --config /tmp/ls-config.json " + f"--state-dir /tmp/ls-state register --node-name {name} -- {token}" + ) + cmd = ( + "lightscale-client --profile netem --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state heartbeat" + ) + for endpoint in endpoints: + cmd += f" --endpoint {endpoint}" + node.succeed(cmd) + + enroll(node1, "node1", ["10.0.0.1:51820"]) + enroll(node3, "node3", ["10.0.0.2:40000", "10.0.0.2:41000"]) + + def start_agent(node, endpoints): + node.succeed("touch /tmp/lightscale-agent.log") + cmd = ( + "lightscale-client --profile netem --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state agent --listen-port 51820 " + "--heartbeat-interval 5 --longpoll-timeout 5 " + "--endpoint-stale-after 5 --endpoint-max-rotations 2" + ) + for endpoint in endpoints: + cmd += f" --endpoint {endpoint}" + node.succeed( + "systemd-run --no-block --unit=lightscale-agent --service-type=simple " + "--property=Restart=on-failure --property=RestartSec=1 " + "--property=TimeoutStartSec=30 " + "--property=StandardOutput=append:/tmp/lightscale-agent.log " + "--property=StandardError=append:/tmp/lightscale-agent.log -- " + + cmd + ) + node.wait_for_unit("lightscale-agent.service") + node.wait_until_succeeds("ip link show ls-netem", timeout=60) + + start_agent(node1, ["10.0.0.1:51820"]) + start_agent(node3, ["10.0.0.2:40000", "10.0.0.2:41000"]) + + data1 = json.loads(node1.succeed("cat /tmp/ls-state/state.json")) + data3 = json.loads(node3.succeed("cat /tmp/ls-state/state.json")) + node1_ip = data1["ipv4"] + node3_ip = data3["ipv4"] + + node1.wait_until_succeeds(f"ping -c 3 {node3_ip}", timeout=180) + node3.wait_until_succeeds(f"ping -c 3 {node1_ip}", timeout=180) + + # Impair WAN path between node1 and natgw to validate behavior under delay/loss. + natgw.succeed("tc qdisc replace dev eth1 root netem delay 120ms 20ms loss 8%") + natgw.succeed("tc -s qdisc show dev eth1 | grep -q netem") + + def eventually_ping(src, dst): + src.wait_until_succeeds( + f"for i in $(seq 1 20); do ping -c 1 -W 1 {dst} && exit 0; sleep 1; done; exit 1", + timeout=180, + ) + + eventually_ping(node1, node3_ip) + eventually_ping(node3, node1_ip) + + natgw.succeed("tc qdisc del dev eth1 root") + + # Simulate NAT rebinding by changing external UDP port for node3. + set_nat_port(41000) + + node1.wait_until_succeeds("wg show ls-netem endpoints | grep -q ':41000'", timeout=180) + node1.wait_until_succeeds(f"ping -c 3 {node3_ip}", timeout=180) + node3.wait_until_succeeds(f"ping -c 3 {node1_ip}", timeout=180) + ''; +} diff --git a/lab/test-relay-switch.nix b/lab/test-relay-switch.nix new file mode 100644 index 0000000..c869f51 --- /dev/null +++ b/lab/test-relay-switch.nix @@ -0,0 +1,156 @@ +{ pkgs, serverPkg, clientPkg }: +{ + name = "lightscale-lab-relay-switch"; + nodes = { + node1 = { ... }: { + networking.hostName = "node1"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.1"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + serverPkg + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.netcat-openbsd + pkgs.curl + pkgs.iptables + ]; + }; + node2 = { ... }: { + networking.hostName = "node2"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.2"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.curl + pkgs.iptables + ]; + }; + }; + + testScript = '' + start_all() + node1.wait_for_unit("multi-user.target") + node2.wait_for_unit("multi-user.target") + node1.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.1/24'") + node2.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.2/24'") + + node1.succeed("touch /tmp/lightscale-server.log") + node1.execute("sh -c 'tail -n +1 -f /tmp/lightscale-server.log >/dev/console 2>&1 &'") + node1.succeed( + "systemd-run --no-block --unit=lightscale-server --service-type=simple " + "--property=Restart=on-failure --property=RestartSec=1 " + "--property=TimeoutStartSec=30 " + "--property=StandardOutput=append:/tmp/lightscale-server.log " + "--property=StandardError=append:/tmp/lightscale-server.log " + "--setenv=RUST_LOG=info --setenv=LIGHTSCALE_ADMIN_TOKEN=test-admin -- " + "lightscale-server --listen 10.0.0.1:8080 --state /tmp/lightscale-state.json " + "--stream-relay 10.0.0.1:8443 --stream-relay-listen 10.0.0.1:8443" + ) + node1.wait_for_unit("lightscale-server.service") + node1.wait_for_open_port(8080, addr="10.0.0.1", timeout=120) + node1.wait_for_open_port(8443, addr="10.0.0.1", timeout=120) + + import json + + net = json.loads(node1.succeed( + "curl -sSf -X POST http://10.0.0.1:8080/v1/networks " + "-H 'authorization: Bearer test-admin' " + "-H 'content-type: application/json' " + "-d '{\"name\":\"relay-switch\",\"bootstrap_token_ttl_seconds\":600," + "\"bootstrap_token_uses\":10,\"bootstrap_token_tags\":[\"relay-switch\"]}'" + )) + token = net["bootstrap_token"]["token"] + + def enroll(node, name, endpoint): + node.succeed( + "lightscale-client --profile rly --config /tmp/ls-config.json " + "init http://10.0.0.1:8080" + ) + node.succeed( + f"lightscale-client --profile rly --config /tmp/ls-config.json " + f"--state-dir /tmp/ls-state register --node-name {name} -- {token}" + ) + node.succeed( + f"lightscale-client --profile rly --config /tmp/ls-config.json " + f"--state-dir /tmp/ls-state heartbeat --endpoint {endpoint}" + ) + + enroll(node1, "node1", "10.0.0.1:51820") + enroll(node2, "node2", "10.0.0.2:51820") + + def start_agent(node, endpoint): + node.succeed("touch /tmp/lightscale-agent.log") + node.execute("sh -c 'tail -n +1 -f /tmp/lightscale-agent.log >/dev/console 2>&1 &'") + cmd = ( + "lightscale-client --profile rly --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state agent --listen-port 51820 " + "--heartbeat-interval 5 --longpoll-timeout 5 " + "--endpoint-stale-after 5 --endpoint-max-rotations 1 " + "--relay-reprobe-after 8 --stream-relay " + f"--endpoint {endpoint}" + ) + node.succeed( + "systemd-run --no-block --unit=lightscale-agent --service-type=simple " + "--property=Restart=on-failure --property=RestartSec=1 " + "--property=TimeoutStartSec=30 " + "--property=StandardOutput=append:/tmp/lightscale-agent.log " + "--property=StandardError=append:/tmp/lightscale-agent.log -- " + + cmd + ) + node.wait_for_unit("lightscale-agent.service") + node.wait_until_succeeds("ip link show ls-rly", timeout=60) + + start_agent(node1, "10.0.0.1:51820") + start_agent(node2, "10.0.0.2:51820") + + data1 = json.loads(node1.succeed("cat /tmp/ls-state/state.json")) + data2 = json.loads(node2.succeed("cat /tmp/ls-state/state.json")) + ip1 = data1["ipv4"] + ip2 = data2["ipv4"] + + # Direct path baseline. + node1.wait_until_succeeds("wg show ls-rly endpoints | grep -q '10.0.0.2:51820'", timeout=120) + node2.wait_until_succeeds("wg show ls-rly endpoints | grep -q '10.0.0.1:51820'", timeout=120) + node1.wait_until_succeeds(f"ping -c 3 {ip2}", timeout=180) + node2.wait_until_succeeds(f"ping -c 3 {ip1}", timeout=180) + + # Force direct UDP path down; stream relay should keep connectivity. + node1.succeed("iptables -I OUTPUT -p udp --dport 51820 -d 10.0.0.2 -j DROP") + node1.succeed("iptables -I INPUT -p udp --sport 51820 -s 10.0.0.2 -j DROP") + node2.succeed("iptables -I OUTPUT -p udp --dport 51820 -d 10.0.0.1 -j DROP") + node2.succeed("iptables -I INPUT -p udp --sport 51820 -s 10.0.0.1 -j DROP") + + node1.wait_until_succeeds("grep -q 'connected to 10.0.0.1:8443' /tmp/lightscale-agent.log", timeout=120) + node2.wait_until_succeeds("grep -q 'connected to 10.0.0.1:8443' /tmp/lightscale-agent.log", timeout=120) + node1.wait_until_succeeds(f"ping -c 3 {ip2}", timeout=240) + node2.wait_until_succeeds(f"ping -c 3 {ip1}", timeout=240) + + # Bring UDP back and verify direct path recovers. + node1.succeed("iptables -D OUTPUT -p udp --dport 51820 -d 10.0.0.2 -j DROP") + node1.succeed("iptables -D INPUT -p udp --sport 51820 -s 10.0.0.2 -j DROP") + node2.succeed("iptables -D OUTPUT -p udp --dport 51820 -d 10.0.0.1 -j DROP") + node2.succeed("iptables -D INPUT -p udp --sport 51820 -s 10.0.0.1 -j DROP") + + node1.wait_until_succeeds("wg show ls-rly endpoints | grep -q '10.0.0.2:51820'", timeout=240) + node2.wait_until_succeeds("wg show ls-rly endpoints | grep -q '10.0.0.1:51820'", timeout=240) + node1.wait_until_succeeds(f"ping -c 3 {ip2}", timeout=240) + node2.wait_until_succeeds(f"ping -c 3 {ip1}", timeout=240) + ''; +} diff --git a/lab/test-simple-autoreg.nix b/lab/test-simple-autoreg.nix new file mode 100644 index 0000000..21f4c21 --- /dev/null +++ b/lab/test-simple-autoreg.nix @@ -0,0 +1,197 @@ +{ pkgs, serverPkg, clientPkg }: +let + serverModule = import ../nixos/modules/lightscale-server.nix { + defaultPackage = serverPkg; + }; + clientModule = import ../nixos/modules/lightscale-client.nix { + defaultPackage = clientPkg; + }; +in +{ + name = "lightscale-lab-simple-autoreg"; + nodes = { + server = { ... }: { + imports = [ serverModule ]; + networking.hostName = "server"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { + address = "10.0.0.1"; + prefixLength = 24; + } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + + services.lightscale-server = { + enable = true; + listen = "10.0.0.1:8080"; + stateFile = "/var/lib/lightscale-server/state.json"; + adminToken = "lab-admin-token"; + }; + + environment.systemPackages = [ + clientPkg + pkgs.curl + pkgs.iputils + ]; + }; + + client1 = { ... }: { + imports = [ clientModule ]; + networking.hostName = "client1"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { + address = "10.0.0.2"; + prefixLength = 24; + } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + + services.lightscale-client = { + enable = true; + controlUrls = [ "http://10.0.0.1:8080" ]; + autoRegister = true; + enrollmentTokenFile = "/run/lightscale-enroll.token"; + registerNodeName = "simple-client1"; + registerExtraArgs = [ + "--machine-private-key-file" + "/run/lightscale-machine.key" + "--wg-private-key-file" + "/run/lightscale-wg.key" + ]; + agentArgs = [ + "--listen-port" + "51820" + "--apply-routes" + "--heartbeat-interval" + "5" + "--longpoll-timeout" + "5" + ]; + }; + + environment.systemPackages = [ + clientPkg + pkgs.iputils + ]; + }; + + client2 = { ... }: { + imports = [ clientModule ]; + networking.hostName = "client2"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { + address = "10.0.0.3"; + prefixLength = 24; + } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + + services.lightscale-client = { + enable = true; + controlUrls = [ "http://10.0.0.1:8080" ]; + autoRegister = true; + enrollmentTokenFile = "/run/lightscale-enroll.token"; + registerNodeName = "simple-client2"; + registerExtraArgs = [ + "--machine-private-key-file" + "/run/lightscale-machine.key" + "--wg-private-key-file" + "/run/lightscale-wg.key" + ]; + agentArgs = [ + "--listen-port" + "51820" + "--apply-routes" + "--heartbeat-interval" + "5" + "--longpoll-timeout" + "5" + ]; + }; + + environment.systemPackages = [ + clientPkg + pkgs.iputils + ]; + }; + }; + + testScript = '' + import base64 + import json + + start_all() + + server.wait_for_unit("lightscale-server.service") + server.wait_for_open_port(8080, addr="10.0.0.1", timeout=120) + client1.wait_for_unit("multi-user.target") + client2.wait_for_unit("multi-user.target") + + net = json.loads(server.succeed( + "curl -sSf -X POST http://10.0.0.1:8080/v1/networks " + "-H 'authorization: Bearer lab-admin-token' " + "-H 'content-type: application/json' " + "-d '{\"name\":\"simple-autoreg\",\"bootstrap_token_ttl_seconds\":1200,\"bootstrap_token_uses\":10}'" + )) + token = net["bootstrap_token"]["token"] + network_id = net["network"]["id"] + + def write_registration_files(node, machine_byte, wg_byte): + machine_key = base64.b64encode(bytes([machine_byte]) * 32).decode("ascii") + wg_key = base64.b64encode(bytes([wg_byte]) * 32).decode("ascii") + node.succeed(f"printf '%s\\n' '{token}' > /run/lightscale-enroll.token") + node.succeed(f"printf '%s\\n' '{machine_key}' > /run/lightscale-machine.key") + node.succeed(f"printf '%s\\n' '{wg_key}' > /run/lightscale-wg.key") + return machine_key, wg_key + + def run_autoreg(node): + node.execute("systemctl reset-failed lightscale-client-register.service || true") + node.succeed("systemctl start lightscale-client-register.service") + node.wait_for_unit("lightscale-client-register.service") + node.wait_until_succeeds("test -s /var/lib/lightscale-client/default/state.json", timeout=120) + node.wait_until_succeeds("systemctl is-active lightscale-client.service", timeout=120) + node.wait_until_succeeds("ip link show ls-default", timeout=120) + + expected1 = write_registration_files(client1, 11, 21) + expected2 = write_registration_files(client2, 12, 22) + run_autoreg(client1) + run_autoreg(client2) + + state1 = json.loads(client1.succeed("cat /var/lib/lightscale-client/default/state.json")) + state2 = json.loads(client2.succeed("cat /var/lib/lightscale-client/default/state.json")) + assert state1["network_id"] == network_id + assert state2["network_id"] == network_id + assert state1["machine_private_key"] == expected1[0] + assert state1["wg_private_key"] == expected1[1] + assert state2["machine_private_key"] == expected2[0] + assert state2["wg_private_key"] == expected2[1] + + ip1 = state1["ipv4"] + ip2 = state2["ipv4"] + + server.wait_until_succeeds( + f"curl -sSf -H 'authorization: Bearer lab-admin-token' " + f"http://10.0.0.1:8080/v1/admin/networks/{network_id}/nodes | grep -q '\"approved\":true'", + timeout=120, + ) + + client1.wait_until_succeeds(f"ping -c 3 {ip2}", timeout=180) + client2.wait_until_succeeds(f"ping -c 3 {ip1}", timeout=180) + + client1.succeed("systemctl restart lightscale-client.service") + client1.wait_for_unit("lightscale-client.service") + client1.wait_until_succeeds(f"ping -c 3 {ip2}", timeout=180) + ''; +} diff --git a/lab/test-soak.nix b/lab/test-soak.nix new file mode 100644 index 0000000..cdcddf9 --- /dev/null +++ b/lab/test-soak.nix @@ -0,0 +1,157 @@ +{ pkgs, serverPkg, clientPkg }: +{ + name = "lightscale-lab-soak"; + nodes = { + node1 = { ... }: { + networking.hostName = "node1"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.1"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + serverPkg + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.netcat-openbsd + pkgs.curl + pkgs.iptables + ]; + }; + node2 = { ... }: { + networking.hostName = "node2"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.2"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.curl + pkgs.iptables + ]; + }; + }; + + testScript = '' + start_all() + node1.wait_for_unit("multi-user.target") + node2.wait_for_unit("multi-user.target") + node1.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.1/24'") + node2.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.2/24'") + + node1.succeed("touch /tmp/lightscale-server.log") + node1.execute("sh -c 'tail -n +1 -f /tmp/lightscale-server.log >/dev/console 2>&1 &'") + node1.succeed( + "systemd-run --no-block --unit=lightscale-server --service-type=simple " + "--property=Restart=on-failure --property=RestartSec=1 " + "--property=TimeoutStartSec=30 " + "--property=StandardOutput=append:/tmp/lightscale-server.log " + "--property=StandardError=append:/tmp/lightscale-server.log " + "--setenv=RUST_LOG=info --setenv=LIGHTSCALE_ADMIN_TOKEN=test-admin -- " + "lightscale-server --listen 10.0.0.1:8080 --state /tmp/lightscale-state.json " + "--stream-relay 10.0.0.1:8443 --stream-relay-listen 10.0.0.1:8443" + ) + node1.wait_for_unit("lightscale-server.service") + node1.wait_for_open_port(8080, addr="10.0.0.1", timeout=120) + node1.wait_for_open_port(8443, addr="10.0.0.1", timeout=120) + + import json + + net = json.loads(node1.succeed( + "curl -sSf -X POST http://10.0.0.1:8080/v1/networks " + "-H 'authorization: Bearer test-admin' " + "-H 'content-type: application/json' " + "-d '{\"name\":\"soak\",\"bootstrap_token_ttl_seconds\":600," + "\"bootstrap_token_uses\":10,\"bootstrap_token_tags\":[\"soak\"]}'" + )) + token = net["bootstrap_token"]["token"] + + def enroll(node, name, endpoint): + node.succeed( + "lightscale-client --profile soak --config /tmp/ls-config.json " + "init http://10.0.0.1:8080" + ) + node.succeed( + f"lightscale-client --profile soak --config /tmp/ls-config.json " + f"--state-dir /tmp/ls-state register --node-name {name} -- {token}" + ) + node.succeed( + f"lightscale-client --profile soak --config /tmp/ls-config.json " + f"--state-dir /tmp/ls-state heartbeat --endpoint {endpoint}" + ) + + enroll(node1, "node1", "10.0.0.1:51820") + enroll(node2, "node2", "10.0.0.2:51820") + + def start_agent(node, endpoint): + node.succeed("touch /tmp/lightscale-agent.log") + cmd = ( + "lightscale-client --profile soak --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state agent --listen-port 51820 " + "--heartbeat-interval 5 --longpoll-timeout 5 " + "--endpoint-stale-after 5 --endpoint-max-rotations 1 " + "--relay-reprobe-after 8 --stream-relay " + f"--endpoint {endpoint}" + ) + node.succeed( + "systemd-run --no-block --unit=lightscale-agent --service-type=simple " + "--property=Restart=on-failure --property=RestartSec=1 " + "--property=TimeoutStartSec=30 " + "--property=StandardOutput=append:/tmp/lightscale-agent.log " + "--property=StandardError=append:/tmp/lightscale-agent.log -- " + + cmd + ) + node.wait_for_unit("lightscale-agent.service") + node.wait_until_succeeds("ip link show ls-soak", timeout=60) + + start_agent(node1, "10.0.0.1:51820") + start_agent(node2, "10.0.0.2:51820") + + data1 = json.loads(node1.succeed("cat /tmp/ls-state/state.json")) + data2 = json.loads(node2.succeed("cat /tmp/ls-state/state.json")) + ip1 = data1["ipv4"] + ip2 = data2["ipv4"] + + def mesh_ping(timeout=180): + node1.wait_until_succeeds(f"ping -c 2 {ip2}", timeout=timeout) + node2.wait_until_succeeds(f"ping -c 2 {ip1}", timeout=timeout) + + for cycle in range(1, 7): + print(f"=== soak cycle {cycle} ===") + mesh_ping() + + if cycle % 2 == 1: + node1.succeed("iptables -I OUTPUT -p udp --dport 51820 -d 10.0.0.2 -j DROP") + node1.succeed("iptables -I INPUT -p udp --sport 51820 -s 10.0.0.2 -j DROP") + node2.succeed("iptables -I OUTPUT -p udp --dport 51820 -d 10.0.0.1 -j DROP") + node2.succeed("iptables -I INPUT -p udp --sport 51820 -s 10.0.0.1 -j DROP") + mesh_ping(timeout=240) + node1.succeed("iptables -D OUTPUT -p udp --dport 51820 -d 10.0.0.2 -j DROP") + node1.succeed("iptables -D INPUT -p udp --sport 51820 -s 10.0.0.2 -j DROP") + node2.succeed("iptables -D OUTPUT -p udp --dport 51820 -d 10.0.0.1 -j DROP") + node2.succeed("iptables -D INPUT -p udp --sport 51820 -s 10.0.0.1 -j DROP") + node1.wait_until_succeeds("wg show ls-soak endpoints | grep -q '10.0.0.2:51820'", timeout=240) + node2.wait_until_succeeds("wg show ls-soak endpoints | grep -q '10.0.0.1:51820'", timeout=240) + mesh_ping(timeout=240) + + if cycle % 3 == 0: + node2.succeed("systemctl stop lightscale-agent.service") + start_agent(node2, "10.0.0.2:51820") + mesh_ping(timeout=240) + + node1.succeed("! grep -qi panic /tmp/lightscale-agent.log") + node2.succeed("! grep -qi panic /tmp/lightscale-agent.log") + ''; +} diff --git a/lab/test-standalone.nix b/lab/test-standalone.nix new file mode 100644 index 0000000..555d3e7 --- /dev/null +++ b/lab/test-standalone.nix @@ -0,0 +1,138 @@ +{ pkgs, serverPkg, clientPkg }: +{ + name = "lightscale-lab-standalone"; + nodes = { + node1 = { ... }: { + networking.hostName = "node1"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.1"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + serverPkg + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.netcat-openbsd + pkgs.curl + ]; + }; + node2 = { ... }: { + networking.hostName = "node2"; + networking.usePredictableInterfaceNames = false; + virtualisation.vlans = [ 1 ]; + networking.interfaces.eth1.useDHCP = false; + networking.interfaces.eth1.ipv4.addresses = [ + { address = "10.0.0.2"; prefixLength = 24; } + ]; + networking.firewall.enable = false; + boot.kernelModules = [ "wireguard" ]; + environment.systemPackages = [ + clientPkg + pkgs.wireguard-tools + pkgs.iproute2 + pkgs.iputils + pkgs.curl + ]; + }; + }; + + testScript = '' + start_all() + node1.wait_for_unit("multi-user.target") + node2.wait_for_unit("multi-user.target") + node1.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.1/24'") + node2.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.2/24'") + + # Start control plane as a plain process (no systemd-run usage). + node1.succeed("touch /tmp/lightscale-server.log") + node1.succeed( + "sh -c 'LIGHTSCALE_ADMIN_TOKEN=test-admin RUST_LOG=info " + "lightscale-server --listen 10.0.0.1:8080 --state /tmp/lightscale-state.json " + ">/tmp/lightscale-server.log 2>&1 & echo $! >/tmp/lightscale-server.pid'" + ) + node1.wait_until_succeeds("sh -c 'kill -0 $(cat /tmp/lightscale-server.pid)'", timeout=30) + node1.wait_for_open_port(8080, addr="10.0.0.1", timeout=120) + + import json + + net = json.loads(node1.succeed( + "curl -sSf -X POST http://10.0.0.1:8080/v1/networks " + "-H 'authorization: Bearer test-admin' " + "-H 'content-type: application/json' " + "-d '{\"name\":\"standalone\",\"bootstrap_token_ttl_seconds\":600," + "\"bootstrap_token_uses\":10,\"bootstrap_token_tags\":[\"standalone\"]}'" + )) + token = net["bootstrap_token"]["token"] + + def enroll(node, name, endpoint): + node.succeed( + "lightscale-client --profile std --config /tmp/ls-config.json " + "init http://10.0.0.1:8080" + ) + node.succeed( + f"lightscale-client --profile std --config /tmp/ls-config.json " + f"--state-dir /tmp/ls-state register --node-name {name} -- {token}" + ) + node.succeed( + f"lightscale-client --profile std --config /tmp/ls-config.json " + f"--state-dir /tmp/ls-state heartbeat --endpoint {endpoint}" + ) + + enroll(node1, "node1", "10.0.0.1:51820") + enroll(node2, "node2", "10.0.0.2:51820") + + def start_daemon(node, endpoint): + node.succeed("touch /tmp/lightscale-daemon.log") + node.succeed( + "sh -c 'nohup lightscale-client --profile std --config /tmp/ls-config.json " + "--state-dir /tmp/ls-state daemon --profiles std " + "--agent-arg=--listen-port --agent-arg=51820 " + "--agent-arg=--heartbeat-interval --agent-arg=5 " + "--agent-arg=--longpoll-timeout --agent-arg=5 " + "--agent-arg=--endpoint --agent-arg=" + endpoint + " " + ">/tmp/lightscale-daemon.log 2>&1 & echo $! >/tmp/lightscale-daemon.pid'" + ) + node.wait_until_succeeds("sh -c 'kill -0 $(cat /tmp/lightscale-daemon.pid)'", timeout=30) + node.wait_until_succeeds("ip link show ls-std", timeout=120) + + def stop_daemon(node): + node.execute("sh -c 'kill -TERM $(cat /tmp/lightscale-daemon.pid) || true'") + node.wait_until_succeeds( + "sh -c 'pid=$(cat /tmp/lightscale-daemon.pid); " + "if kill -0 \"$pid\" 2>/dev/null; then exit 1; else exit 0; fi'", + timeout=120, + ) + + start_daemon(node1, "10.0.0.1:51820") + start_daemon(node2, "10.0.0.2:51820") + + data1 = json.loads(node1.succeed("cat /tmp/ls-state/state.json")) + data2 = json.loads(node2.succeed("cat /tmp/ls-state/state.json")) + ip1 = data1["ipv4"] + ip2 = data2["ipv4"] + + node1.wait_until_succeeds(f"ping -c 3 {ip2}", timeout=180) + node2.wait_until_succeeds(f"ping -c 3 {ip1}", timeout=180) + + # Restart plain daemons and verify connectivity recovers. + stop_daemon(node1) + stop_daemon(node2) + node1.fail("systemctl is-active lightscale-agent.service") + node2.fail("systemctl is-active lightscale-agent.service") + + start_daemon(node1, "10.0.0.1:51820") + start_daemon(node2, "10.0.0.2:51820") + node1.wait_until_succeeds(f"ping -c 3 {ip2}", timeout=240) + node2.wait_until_succeeds(f"ping -c 3 {ip1}", timeout=240) + + stop_daemon(node1) + stop_daemon(node2) + node1.execute("sh -c 'kill -TERM $(cat /tmp/lightscale-server.pid) || true'") + ''; +}