{ pkgs, photoncloudPackages, photoncloudModule, nixNosModule, }: let edgeZebraConfig = pkgs.writeText "fiberlb-ecmp-edge-zebra.conf" '' hostname edge-zebra log stdout debugging ''; edgeBgpdConfig = pkgs.writeText "fiberlb-ecmp-edge-bgpd.conf" '' hostname edge-frr log stdout debugging router bgp 65020 bgp router-id 192.168.100.1 no bgp ebgp-requires-policy bgp bestpath as-path multipath-relax neighbor 192.168.100.2 remote-as 65010 neighbor 192.168.100.2 description fiberlb-a neighbor 192.168.100.3 remote-as 65010 neighbor 192.168.100.3 description fiberlb-b ! address-family ipv4 unicast maximum-paths 8 neighbor 192.168.100.2 activate neighbor 192.168.100.3 activate exit-address-family ! ''; iamProtoDir = ../../iam/proto; iamProto = "iam.proto"; fiberlbProtoDir = ../../fiberlb/crates/fiberlb-api/proto; fiberlbProto = "fiberlb.proto"; backendScriptA = pkgs.writeText "fiberlb-ecmp-backend-a.py" '' from http.server import BaseHTTPRequestHandler, HTTPServer class Handler(BaseHTTPRequestHandler): def do_GET(self): body = b"fiberlb ecmp backend a\n" self.send_response(200) self.send_header("Content-Type", "text/plain; charset=utf-8") self.send_header("Content-Length", str(len(body))) self.end_headers() self.wfile.write(body) def log_message(self, format, *args): return HTTPServer(("127.0.0.1", 18081), Handler).serve_forever() ''; backendScriptB = pkgs.writeText "fiberlb-ecmp-backend-b.py" '' from http.server import BaseHTTPRequestHandler, HTTPServer class Handler(BaseHTTPRequestHandler): def do_GET(self): body = b"fiberlb ecmp backend b\n" self.send_response(200) self.send_header("Content-Type", "text/plain; charset=utf-8") self.send_header("Content-Length", str(len(body))) self.end_headers() self.wfile.write(body) def log_message(self, format, *args): return HTTPServer(("127.0.0.1", 18081), Handler).serve_forever() ''; in { name = "fiberlb-native-bgp-ecmp-drain-vm-smoke"; nodes = { edge = { ... }: { networking.hostName = "edge"; networking.useDHCP = false; networking.firewall.enable = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.ipv4.addresses = [ { address = "192.168.100.1"; prefixLength = 24; } ]; environment.systemPackages = with pkgs; [ curl frr iproute2 jq ]; users.groups.frr = { }; users.groups.frrvty = { }; users.users.frr = { isSystemUser = true; group = "frr"; extraGroups = [ "frrvty" ]; }; users.users.root.extraGroups = [ "frrvty" ]; systemd.services.frr-zebra = { description = "FRR zebra for FiberLB ECMP smoke"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; serviceConfig = { Type = "simple"; RuntimeDirectory = "frr"; RuntimeDirectoryMode = "0755"; ExecStartPre = "${pkgs.runtimeShell} -lc '${pkgs.coreutils}/bin/install -d -o root -g root /run/frr /var/run/frr && ${pkgs.coreutils}/bin/rm -f /run/frr/zebra.pid /var/run/frr/zebra.pid'"; ExecStart = "${pkgs.frr}/libexec/frr/zebra -f ${edgeZebraConfig} -A 127.0.0.1 -P 2601 -i /run/frr/zebra.pid -z /run/frr/zserv.api -u root -g root --log stdout"; Restart = "on-failure"; RestartSec = "2s"; }; }; systemd.services.frr-bgpd = { description = "FRR bgpd for FiberLB ECMP smoke"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" "frr-zebra.service" ]; requires = [ "frr-zebra.service" ]; serviceConfig = { Type = "simple"; RuntimeDirectory = "frr"; RuntimeDirectoryMode = "0755"; ExecStartPre = "${pkgs.runtimeShell} -lc '${pkgs.coreutils}/bin/install -d -o root -g root /run/frr /var/run/frr && ${pkgs.coreutils}/bin/rm -f /run/frr/bgpd.pid /var/run/frr/bgpd.pid && for _ in $(seq 1 30); do [ -S /run/frr/zserv.api ] && exit 0; sleep 1; done; echo zserv socket did not appear >&2; exit 1'"; ExecStart = "${pkgs.frr}/libexec/frr/bgpd -f ${edgeBgpdConfig} -A 127.0.0.1 -P 2605 -p 179 -i /run/frr/bgpd.pid -z /run/frr/zserv.api -S --log stdout"; Restart = "on-failure"; RestartSec = "2s"; }; }; system.stateVersion = "24.11"; }; lb_a = { ... }: { imports = [ nixNosModule photoncloudModule ]; networking.hostName = "lb-a"; networking.useDHCP = false; networking.firewall.enable = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.ipv4.addresses = [ { address = "192.168.100.2"; prefixLength = 24; } ]; environment.systemPackages = with pkgs; [ curl grpcurl jq python3 ]; services.iam = { enable = true; package = photoncloudPackages.iam-server; port = 50080; httpPort = 8083; storeBackend = "memory"; }; systemd.services.iam.environment = { IAM_ALLOW_RANDOM_SIGNING_KEY = "1"; }; services.fiberlb = { enable = true; package = photoncloudPackages.fiberlb-server; port = 50085; iamAddr = "192.168.100.2:50080"; metadataBackend = "sqlite"; databaseUrl = "sqlite:/var/lib/fiberlb/metadata.db"; singleNode = true; healthCheckIntervalSecs = 1; healthCheckTimeoutSecs = 1; vipCheckIntervalSecs = 1; vipDrain.holdTimeSecs = 3; vipOwnership = { enable = true; interface = "lo"; }; bgp = { enable = true; localAs = 65010; routerId = "192.168.100.2"; nextHop = "192.168.100.2"; holdTimeSecs = 30; keepaliveSecs = 10; peers = [ { address = "192.168.100.1"; port = 179; asn = 65020; description = "edge"; } ]; }; }; systemd.services.mock-backend = { description = "FiberLB ECMP backend A"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; serviceConfig = { Type = "simple"; ExecStart = "${pkgs.python3}/bin/python ${backendScriptA}"; Restart = "always"; RestartSec = "1s"; }; }; system.stateVersion = "24.11"; }; lb_b = { ... }: { imports = [ nixNosModule photoncloudModule ]; networking.hostName = "lb-b"; networking.useDHCP = false; networking.firewall.enable = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.ipv4.addresses = [ { address = "192.168.100.3"; prefixLength = 24; } ]; environment.systemPackages = with pkgs; [ curl grpcurl jq python3 ]; services.iam = { enable = true; package = photoncloudPackages.iam-server; port = 50080; httpPort = 8083; storeBackend = "memory"; }; systemd.services.iam.environment = { IAM_ALLOW_RANDOM_SIGNING_KEY = "1"; }; services.fiberlb = { enable = true; package = photoncloudPackages.fiberlb-server; port = 50085; iamAddr = "192.168.100.3:50080"; metadataBackend = "sqlite"; databaseUrl = "sqlite:/var/lib/fiberlb/metadata.db"; singleNode = true; healthCheckIntervalSecs = 1; healthCheckTimeoutSecs = 1; vipCheckIntervalSecs = 1; vipDrain.holdTimeSecs = 3; vipOwnership = { enable = true; interface = "lo"; }; bgp = { enable = true; localAs = 65010; routerId = "192.168.100.3"; nextHop = "192.168.100.3"; holdTimeSecs = 30; keepaliveSecs = 10; peers = [ { address = "192.168.100.1"; port = 179; asn = 65020; description = "edge"; } ]; }; }; systemd.services.mock-backend = { description = "FiberLB ECMP backend B"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; serviceConfig = { Type = "simple"; ExecStart = "${pkgs.python3}/bin/python ${backendScriptB}"; Restart = "always"; RestartSec = "1s"; }; }; system.stateVersion = "24.11"; }; }; testScript = '' import json import re import shlex import time IAM_PROTO_DIR = "${iamProtoDir}" IAM_PROTO = "${iamProto}" FIBERLB_PROTO_DIR = "${fiberlbProtoDir}" FIBERLB_PROTO = "${fiberlbProto}" METRIC_RE = re.compile(r"^([a-zA-Z_:][a-zA-Z0-9_:]*)(?:\{([^}]*)\})?\s+([-+0-9.eE]+)$") VIP = "203.0.113.77" VIP_PREFIX = f"{VIP}/32" LISTENER_URL = f"http://{VIP}:18080/" def grpcurl_json(machine, endpoint, import_path, proto, service, payload, headers=None): header_args = "" for header in headers or []: header_args += f" -H {shlex.quote(header)}" command = ( f"grpcurl -plaintext{header_args} " f"-import-path {shlex.quote(import_path)} " f"-proto {shlex.quote(proto)} " f"-d {shlex.quote(json.dumps(payload))} " f"{shlex.quote(endpoint)} {shlex.quote(service)}" ) status, output = machine.execute(f"timeout 15 sh -lc {shlex.quote(command + ' 2>&1')}") if status != 0: raise AssertionError( "grpcurl failed" f" service={service}" f" status={status}" f" payload={json.dumps(payload, sort_keys=True)}" f" output={output}" ) return json.loads(output) def issue_project_admin_token(machine, org_id, project_id): principal_id = f"fiberlb-ecmp-{machine.name}-{int(time.time())}" deadline = time.time() + 120 def retry(action): last_error = None while time.time() < deadline: try: return action() except Exception as exc: last_error = exc time.sleep(2) raise AssertionError(f"IAM bootstrap timed out: {last_error}") retry(lambda: grpcurl_json( machine, "127.0.0.1:50080", IAM_PROTO_DIR, IAM_PROTO, "iam.v1.IamAdmin/CreatePrincipal", { "id": principal_id, "kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT", "name": principal_id, "orgId": org_id, "projectId": project_id, }, )) retry(lambda: grpcurl_json( machine, "127.0.0.1:50080", IAM_PROTO_DIR, IAM_PROTO, "iam.v1.IamAdmin/CreateBinding", { "principal": { "kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT", "id": principal_id, }, "role": "roles/ProjectAdmin", "scope": { "project": { "id": project_id, "orgId": org_id, } }, }, )) token_response = retry(lambda: grpcurl_json( machine, "127.0.0.1:50080", IAM_PROTO_DIR, IAM_PROTO, "iam.v1.IamToken/IssueToken", { "principalId": principal_id, "principalKind": "PRINCIPAL_KIND_SERVICE_ACCOUNT", "scope": { "project": { "id": project_id, "orgId": org_id, } }, "ttlSeconds": 3600, }, )) return token_response["token"] def create_load_balancer(machine, token, name_suffix): response = grpcurl_json( machine, "127.0.0.1:50085", FIBERLB_PROTO_DIR, FIBERLB_PROTO, "fiberlb.v1.LoadBalancerService/CreateLoadBalancer", { "name": f"bgp-ecmp-{name_suffix}", "orgId": "bgp-ecmp-org", "projectId": "bgp-ecmp-project", "description": f"native bgp ecmp {name_suffix}", "vipAddress": VIP, }, headers=[f"authorization: Bearer {token}"], ) lb_id = response["loadbalancer"]["id"] pool_id = grpcurl_json( machine, "127.0.0.1:50085", FIBERLB_PROTO_DIR, FIBERLB_PROTO, "fiberlb.v1.PoolService/CreatePool", { "name": f"bgp-ecmp-pool-{name_suffix}", "loadbalancerId": lb_id, "algorithm": "POOL_ALGORITHM_ROUND_ROBIN", "protocol": "POOL_PROTOCOL_TCP", }, headers=[f"authorization: Bearer {token}"], )["pool"]["id"] backend_id = grpcurl_json( machine, "127.0.0.1:50085", FIBERLB_PROTO_DIR, FIBERLB_PROTO, "fiberlb.v1.BackendService/CreateBackend", { "name": f"bgp-ecmp-backend-{name_suffix}", "poolId": pool_id, "address": "127.0.0.1", "port": 18081, "weight": 1, }, headers=[f"authorization: Bearer {token}"], )["backend"]["id"] grpcurl_json( machine, "127.0.0.1:50085", FIBERLB_PROTO_DIR, FIBERLB_PROTO, "fiberlb.v1.HealthCheckService/CreateHealthCheck", { "name": f"bgp-ecmp-health-{name_suffix}", "poolId": pool_id, "type": "HEALTH_CHECK_TYPE_HTTP", "intervalSeconds": 1, "timeoutSeconds": 1, "healthyThreshold": 1, "unhealthyThreshold": 1, "httpConfig": { "method": "GET", "path": "/", "expectedCodes": [200], }, }, headers=[f"authorization: Bearer {token}"], ) grpcurl_json( machine, "127.0.0.1:50085", FIBERLB_PROTO_DIR, FIBERLB_PROTO, "fiberlb.v1.ListenerService/CreateListener", { "name": f"bgp-ecmp-listener-{name_suffix}", "loadbalancerId": lb_id, "protocol": "LISTENER_PROTOCOL_TCP", "port": 18080, "defaultPoolId": pool_id, }, headers=[f"authorization: Bearer {token}"], ) return backend_id def wait_for_backend_status(machine, status, backend_id, token): machine.wait_until_succeeds( "grpcurl -plaintext " f"-H {shlex.quote('authorization: Bearer ' + token)} " f"-import-path {shlex.quote(FIBERLB_PROTO_DIR)} " f"-proto {shlex.quote(FIBERLB_PROTO)} " f"-d {shlex.quote(json.dumps({'id': backend_id}))} " "127.0.0.1:50085 fiberlb.v1.BackendService/GetBackend " f"| jq -e {shlex.quote(f'.backend.status == \"{status}\"')}" ) def machine_diagnostics(machine, unit): metrics = machine.succeed("curl -fsS http://127.0.0.1:9098/metrics || true") service_status = machine.succeed( f"systemctl status {shlex.quote(unit)} --no-pager || true" ) journal = machine.succeed( f"journalctl -u {shlex.quote(unit)} -n 200 --no-pager || true" ) return ( f"metrics:\n{metrics}\n" f"systemctl status:\n{service_status}\n" f"journal:\n{journal}" ) def edge_bgp_diagnostics(): bgpd_status = edge.succeed("systemctl status frr-bgpd.service --no-pager || true") bgpd_journal = edge.succeed("journalctl -u frr-bgpd.service -n 200 --no-pager || true") bgp_summary = edge.succeed("vtysh -c 'show ip bgp summary' || true") bgp_route = edge.succeed(f"vtysh -c 'show ip bgp {VIP_PREFIX}' || true") zebra_route = edge.succeed(f"vtysh -c 'show ip route {VIP_PREFIX}' || true") kernel_route = edge.succeed(f"ip route show {VIP_PREFIX} || true") return ( "edge frr-bgpd status:\n" f"{bgpd_status}\n" "edge frr-bgpd journal:\n" f"{bgpd_journal}\n" "edge BGP summary:\n" f"{bgp_summary}\n" f"edge BGP route {VIP_PREFIX}:\n" f"{bgp_route}\n" f"edge zebra route {VIP_PREFIX}:\n" f"{zebra_route}\n" f"edge kernel route {VIP_PREFIX}:\n" f"{kernel_route}\n" ) def wait_for_unit_or_dump(machine, unit): deadline = time.time() + 120 while time.time() < deadline: status, output = machine.execute(f"systemctl is-active {shlex.quote(unit)}") state = output.strip() if status == 0 and state == "active": return if state == "failed": raise AssertionError( f"unit {unit} failed to start\n{machine_diagnostics(machine, unit)}" ) time.sleep(1) raise AssertionError( f"unit {unit} did not become active before timeout\n{machine_diagnostics(machine, unit)}" ) def wait_for_command_or_dump(machine, command, unit=None, timeout=120): deadline = time.time() + timeout last_output = "" while time.time() < deadline: status, output = machine.execute(f"sh -lc {shlex.quote(command + ' 2>&1')}") last_output = output if status == 0: return time.sleep(1) diagnostics = f"last command output:\n{last_output}\n" if unit is not None: diagnostics += machine_diagnostics(machine, unit) diagnostics += f"socket state:\n{machine.succeed('ss -ltnp || true')}\n" raise AssertionError( f"command did not succeed before timeout: {command}\n{diagnostics}" ) def parse_labels(label_blob): if not label_blob: return {} labels = {} for part in label_blob.split(","): key, value = part.split("=", 1) labels[key] = value.strip().strip('"') return labels def wait_for_metric(machine, metric_name, expected_value, labels=None): expected_labels = labels or {} deadline = time.time() + 60 last_exposition = "" while time.time() < deadline: exposition = machine.succeed("curl -fsS http://127.0.0.1:9098/metrics") last_exposition = exposition for line in exposition.splitlines(): line = line.strip() if not line or line.startswith("#"): continue match = METRIC_RE.match(line) if not match: continue name, label_blob, value = match.groups() if name != metric_name: continue if parse_labels(label_blob) != expected_labels: continue if abs(float(value) - float(expected_value)) < 0.0001: return time.sleep(1) raise AssertionError( f"metric {metric_name} with labels={expected_labels} did not reach {expected_value}\n" f"last metrics scrape:\n{last_exposition}\n" f"{machine_diagnostics(machine, 'fiberlb.service')}\n" f"{edge_bgp_diagnostics()}" ) def wait_for_local_vip(machine, present): pattern = f"inet {VIP}/32" if present: machine.wait_until_succeeds( f"ip -4 addr show dev lo | grep -F {shlex.quote(pattern)}" ) else: deadline = time.time() + 60 while time.time() < deadline: output = machine.succeed("ip -4 addr show dev lo || true") if pattern not in output: return time.sleep(1) raise AssertionError(f"VIP {VIP} still present on loopback") def wait_for_edge_route(next_hops): deadline = time.time() + 60 last_output = "" while time.time() < deadline: output = edge.succeed(f"ip route show {shlex.quote(VIP_PREFIX)} || true") last_output = output if all(next_hop in output for next_hop in next_hops): return time.sleep(1) raise AssertionError( f"edge route for {VIP_PREFIX} did not contain nexthops {next_hops}\n" f"last kernel route output:\n{last_output}\n" f"{edge_bgp_diagnostics()}" ) def wait_for_edge_route_absent(needle): deadline = time.time() + 60 last_output = "" while time.time() < deadline: output = edge.succeed(f"ip route show {shlex.quote(VIP_PREFIX)} || true") last_output = output if needle not in output: return time.sleep(1) raise AssertionError( f"edge route for {VIP_PREFIX} still contained {needle}\n" f"last kernel route output:\n{last_output}\n" f"{edge_bgp_diagnostics()}" ) def wait_for_http_any(): edge.wait_until_succeeds( f"curl -fsS --max-time 5 {shlex.quote(LISTENER_URL)} | grep -E 'fiberlb ecmp backend (a|b)'" ) start_all() serial_stdout_off() wait_for_unit_or_dump(edge, "frr-zebra.service") wait_for_command_or_dump(edge, "test -S /run/frr/zserv.api", "frr-zebra.service") wait_for_unit_or_dump(edge, "frr-bgpd.service") wait_for_command_or_dump( edge, "ss -ltnH '( sport = :179 )' | grep -q LISTEN", "frr-bgpd.service", ) for machine in [lb_a, lb_b]: wait_for_unit_or_dump(machine, "iam.service") wait_for_command_or_dump(machine, "ss -ltnH '( sport = :50080 )' | grep -q LISTEN", "iam.service") wait_for_unit_or_dump(machine, "mock-backend.service") wait_for_unit_or_dump(machine, "fiberlb.service") wait_for_command_or_dump(machine, "ss -ltnH '( sport = :50085 )' | grep -q LISTEN", "fiberlb.service") wait_for_command_or_dump(machine, "ss -ltnH '( sport = :9098 )' | grep -q LISTEN", "fiberlb.service") wait_for_command_or_dump( edge, "vtysh -c 'show ip bgp neighbor 192.168.100.2' | grep -F 'BGP state = Established'", "frr-bgpd.service", ) wait_for_command_or_dump( edge, "vtysh -c 'show ip bgp neighbor 192.168.100.3' | grep -F 'BGP state = Established'", "frr-bgpd.service", ) token_a = issue_project_admin_token(lb_a, "bgp-ecmp-org", "bgp-ecmp-project") token_b = issue_project_admin_token(lb_b, "bgp-ecmp-org", "bgp-ecmp-project") backend_a = create_load_balancer(lb_a, token_a, "a") backend_b = create_load_balancer(lb_b, token_b, "b") wait_for_backend_status(lb_a, "BACKEND_STATUS_ONLINE", backend_a, token_a) wait_for_backend_status(lb_b, "BACKEND_STATUS_ONLINE", backend_b, token_b) wait_for_metric(lb_a, "fiberlb_bgp_connected_peers", 1) wait_for_metric(lb_b, "fiberlb_bgp_connected_peers", 1) wait_for_local_vip(lb_a, True) wait_for_local_vip(lb_b, True) wait_for_edge_route(["via 192.168.100.2", "via 192.168.100.3"]) wait_for_http_any() lb_a.succeed("touch /var/lib/fiberlb/drain") wait_for_metric(lb_a, "fiberlb_vip_drain_active", 1) wait_for_edge_route(["via 192.168.100.3"]) wait_for_edge_route_absent("via 192.168.100.2") wait_for_local_vip(lb_a, True) edge.wait_until_succeeds( f"curl -fsS --max-time 5 {shlex.quote(LISTENER_URL)} | grep -F 'fiberlb ecmp backend b'" ) time.sleep(4) wait_for_local_vip(lb_a, False) lb_a.succeed("rm -f /var/lib/fiberlb/drain") wait_for_metric(lb_a, "fiberlb_vip_drain_active", 0) wait_for_local_vip(lb_a, True) wait_for_edge_route(["via 192.168.100.2", "via 192.168.100.3"]) wait_for_http_any() ''; }