{ pkgs, serverPkg, clientPkg }: { name = "lightscale-lab-nat-churn"; nodes = { node1 = { ... }: { networking.hostName = "node1"; networking.usePredictableInterfaceNames = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.useDHCP = false; networking.interfaces.eth1.ipv4.addresses = [ { address = "10.0.0.1"; prefixLength = 24; } ]; networking.firewall.enable = false; boot.kernelModules = [ "wireguard" ]; environment.systemPackages = [ serverPkg clientPkg pkgs.wireguard-tools pkgs.iproute2 pkgs.iputils pkgs.netcat-openbsd pkgs.curl ]; }; natgw = { ... }: { networking.hostName = "natgw"; networking.usePredictableInterfaceNames = false; virtualisation.vlans = [ 1 2 ]; networking.interfaces.eth1.useDHCP = false; networking.interfaces.eth1.ipv4.addresses = [ { address = "10.0.0.2"; prefixLength = 24; } ]; networking.interfaces.eth2.useDHCP = false; networking.interfaces.eth2.ipv4.addresses = [ { address = "192.168.60.1"; prefixLength = 24; } ]; networking.firewall.enable = false; boot.kernel.sysctl."net.ipv4.ip_forward" = 1; environment.systemPackages = [ pkgs.iproute2 pkgs.iputils pkgs.iptables pkgs.conntrack-tools ]; }; node3 = { ... }: { networking.hostName = "node3"; networking.usePredictableInterfaceNames = false; virtualisation.vlans = [ 2 ]; networking.interfaces.eth1.useDHCP = false; networking.interfaces.eth1.ipv4.addresses = [ { address = "192.168.60.10"; prefixLength = 24; } ]; networking.defaultGateway = { address = "192.168.60.1"; interface = "eth1"; }; networking.firewall.enable = false; boot.kernelModules = [ "wireguard" ]; environment.systemPackages = [ clientPkg pkgs.wireguard-tools pkgs.iproute2 pkgs.iputils pkgs.curl ]; }; }; testScript = '' start_all() node1.wait_for_unit("multi-user.target") natgw.wait_for_unit("multi-user.target") node3.wait_for_unit("multi-user.target") node1.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.1/24'") natgw.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '10.0.0.2/24'") natgw.wait_until_succeeds("ip -4 addr show dev eth2 | grep -q '192.168.60.1/24'") node3.wait_until_succeeds("ip -4 addr show dev eth1 | grep -q '192.168.60.10/24'") natgw.succeed("iptables -P FORWARD ACCEPT") natgw.succeed("iptables -F") natgw.succeed("iptables -t nat -F") def set_nat_port(port): natgw.succeed("iptables -t nat -F") natgw.succeed( f"iptables -t nat -A PREROUTING -i eth1 -p udp --dport {port} " "-j DNAT --to-destination 192.168.60.10:51820" ) natgw.succeed( f"iptables -t nat -A POSTROUTING -o eth1 -p udp -s 192.168.60.10 " f"--sport 51820 -j SNAT --to-source 10.0.0.2:{port}" ) natgw.succeed("iptables -t nat -A POSTROUTING -o eth1 -j MASQUERADE") natgw.execute("conntrack -F || true") set_nat_port(40000) # Very short UDP conntrack timeout to emulate consumer NAT churn. natgw.succeed("sysctl -w net.netfilter.nf_conntrack_udp_timeout=5") natgw.succeed("sysctl -w net.netfilter.nf_conntrack_udp_timeout_stream=5") node1.succeed("touch /tmp/lightscale-server.log") node1.execute("sh -c 'tail -n +1 -f /tmp/lightscale-server.log >/dev/console 2>&1 &'") node1.succeed( "systemd-run --no-block --unit=lightscale-server --service-type=simple " "--property=Restart=on-failure --property=RestartSec=1 " "--property=TimeoutStartSec=30 " "--property=StandardOutput=append:/tmp/lightscale-server.log " "--property=StandardError=append:/tmp/lightscale-server.log " "--setenv=RUST_LOG=info --setenv=LIGHTSCALE_ADMIN_TOKEN=test-admin -- " "lightscale-server --listen 10.0.0.1:8080 --state /tmp/lightscale-state.json" ) node1.wait_for_unit("lightscale-server.service") node1.wait_for_open_port(8080, addr="10.0.0.1", timeout=120) import json net = json.loads(node1.succeed( "curl -sSf -X POST http://10.0.0.1:8080/v1/networks " "-H 'authorization: Bearer test-admin' " "-H 'content-type: application/json' " "-d '{\"name\":\"nat-churn\",\"bootstrap_token_ttl_seconds\":600," "\"bootstrap_token_uses\":10,\"bootstrap_token_tags\":[\"nat-churn\"]}'" )) token = net["bootstrap_token"]["token"] def enroll(node, name, endpoints): node.succeed( "lightscale-client --profile natchurn --config /tmp/ls-config.json " "init http://10.0.0.1:8080" ) node.succeed( f"lightscale-client --profile natchurn --config /tmp/ls-config.json " f"--state-dir /tmp/ls-state register --node-name {name} -- {token}" ) cmd = ( "lightscale-client --profile natchurn --config /tmp/ls-config.json " "--state-dir /tmp/ls-state heartbeat" ) for endpoint in endpoints: cmd += f" --endpoint {endpoint}" node.succeed(cmd) enroll(node1, "node1", ["10.0.0.1:51820"]) enroll(node3, "node3", ["10.0.0.2:40000", "10.0.0.2:41000", "10.0.0.2:42000", "10.0.0.2:43000"]) def start_agent(node, endpoints): node.succeed("touch /tmp/lightscale-agent.log") cmd = ( "lightscale-client --profile natchurn --config /tmp/ls-config.json " "--state-dir /tmp/ls-state agent --listen-port 51820 " "--heartbeat-interval 5 --longpoll-timeout 5 " "--endpoint-stale-after 5 --endpoint-max-rotations 2" ) for endpoint in endpoints: cmd += f" --endpoint {endpoint}" node.succeed( "systemd-run --no-block --unit=lightscale-agent --service-type=simple " "--property=Restart=on-failure --property=RestartSec=1 " "--property=TimeoutStartSec=30 " "--property=StandardOutput=append:/tmp/lightscale-agent.log " "--property=StandardError=append:/tmp/lightscale-agent.log -- " + cmd ) node.wait_for_unit("lightscale-agent.service") node.wait_until_succeeds("ip link show ls-natchurn", timeout=60) start_agent(node1, ["10.0.0.1:51820"]) start_agent(node3, ["10.0.0.2:40000", "10.0.0.2:41000", "10.0.0.2:42000", "10.0.0.2:43000"]) data1 = json.loads(node1.succeed("cat /tmp/ls-state/state.json")) data3 = json.loads(node3.succeed("cat /tmp/ls-state/state.json")) node1_ip = data1["ipv4"] node3_ip = data3["ipv4"] def eventually_ping(src, dst, timeout=180): src.wait_until_succeeds( f"for i in $(seq 1 20); do ping -c 1 -W 1 {dst} && exit 0; sleep 1; done; exit 1", timeout=timeout, ) eventually_ping(node1, node3_ip) eventually_ping(node3, node1_ip) # Wait beyond short conntrack timeout and verify it re-establishes. natgw.succeed("sleep 7") natgw.execute("conntrack -L -p udp || true") eventually_ping(node1, node3_ip) eventually_ping(node3, node1_ip) # Rebind multiple times. for port in [41000, 42000]: set_nat_port(port) node1.wait_until_succeeds(f"wg show ls-natchurn endpoints | grep -q ':{port}'", timeout=180) eventually_ping(node1, node3_ip) eventually_ping(node3, node1_ip) # Short full outage, then recover on a fresh mapped port. natgw.succeed("iptables -I FORWARD -p udp --dport 51820 -j DROP") natgw.succeed("iptables -I FORWARD -p udp --sport 51820 -j DROP") node1.execute(f"ping -c 2 {node3_ip} || true") natgw.succeed("sleep 10") natgw.succeed("iptables -D FORWARD -p udp --dport 51820 -j DROP") natgw.succeed("iptables -D FORWARD -p udp --sport 51820 -j DROP") set_nat_port(43000) node1.wait_until_succeeds("wg show ls-natchurn endpoints | grep -q ':43000'", timeout=240) eventually_ping(node1, node3_ip, timeout=240) eventually_ping(node3, node1_ip, timeout=240) ''; }