{ pkgs, photoncloudPackages, photoncloudModule, nixNosModule, }: let frrZebraConfig = pkgs.writeText "fiberlb-interop-frr-zebra.conf" '' hostname interop-zebra log stdout debugging ''; frrBgpdConfig = pkgs.writeText "fiberlb-interop-frr-bgpd.conf" '' hostname interop-frr log stdout debugging router bgp 65020 bgp router-id 192.168.100.1 no bgp ebgp-requires-policy neighbor 192.168.100.2 remote-as 65010 neighbor 192.168.100.2 description fiberlb-frr ! address-family ipv4 unicast neighbor 192.168.100.2 activate exit-address-family ! ''; birdConfig = pkgs.writeText "fiberlb-interop-bird.conf" '' router id 192.168.100.3; protocol device {} protocol kernel { ipv4 { import none; export none; }; } protocol bgp fiberlb_peer { local 192.168.100.3 as 65030; neighbor 192.168.100.2 as 65010; ipv4 { import all; export none; }; } ''; gobgpdConfig = pkgs.writeText "fiberlb-interop-gobgpd.json" (builtins.toJSON { global = { config = { as = 65040; router-id = "192.168.100.4"; }; }; neighbors = [ { config = { neighbor-address = "192.168.100.2"; peer-as = 65010; description = "fiberlb-gobgp"; }; } ]; }); iamProtoDir = ../../iam/proto; iamProto = "iam.proto"; fiberlbProtoDir = ../../fiberlb/crates/fiberlb-api/proto; fiberlbProto = "fiberlb.proto"; backendScript = pkgs.writeText "fiberlb-interop-backend.py" '' from http.server import BaseHTTPRequestHandler, HTTPServer class Handler(BaseHTTPRequestHandler): def do_GET(self): body = b"fiberlb interop backend\n" self.send_response(200) self.send_header("Content-Type", "text/plain; charset=utf-8") self.send_header("Content-Length", str(len(body))) self.end_headers() self.wfile.write(body) def log_message(self, format, *args): return HTTPServer(("127.0.0.1", 18081), Handler).serve_forever() ''; in { name = "fiberlb-native-bgp-interop-vm-smoke"; nodes = { frr = { ... }: { networking.hostName = "frr"; networking.useDHCP = false; networking.firewall.enable = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.ipv4.addresses = [ { address = "192.168.100.1"; prefixLength = 24; } ]; environment.systemPackages = with pkgs; [ curl frr jq iproute2 ]; users.groups.frr = { }; users.groups.frrvty = { }; users.users.frr = { isSystemUser = true; group = "frr"; extraGroups = [ "frrvty" ]; }; users.users.root.extraGroups = [ "frrvty" ]; systemd.services.frr-zebra = { description = "FRR zebra for FiberLB interop smoke"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; serviceConfig = { Type = "simple"; RuntimeDirectory = "frr"; RuntimeDirectoryMode = "0755"; ExecStartPre = "${pkgs.runtimeShell} -lc '${pkgs.coreutils}/bin/install -d -o root -g root /run/frr /var/run/frr && ${pkgs.coreutils}/bin/rm -f /run/frr/zebra.pid /var/run/frr/zebra.pid'"; ExecStart = "${pkgs.frr}/libexec/frr/zebra -f ${frrZebraConfig} -A 127.0.0.1 -P 2601 -i /run/frr/zebra.pid -z /run/frr/zserv.api -u root -g root --log stdout"; Restart = "on-failure"; RestartSec = "2s"; }; }; systemd.services.frr-bgpd = { description = "FRR bgpd for FiberLB interop smoke"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" "frr-zebra.service" ]; requires = [ "frr-zebra.service" ]; serviceConfig = { Type = "simple"; RuntimeDirectory = "frr"; RuntimeDirectoryMode = "0755"; ExecStartPre = "${pkgs.runtimeShell} -lc '${pkgs.coreutils}/bin/install -d -o root -g root /run/frr /var/run/frr && ${pkgs.coreutils}/bin/rm -f /run/frr/bgpd.pid /var/run/frr/bgpd.pid && for _ in $(seq 1 30); do [ -S /run/frr/zserv.api ] && exit 0; sleep 1; done; echo zserv socket did not appear >&2; exit 1'"; ExecStart = "${pkgs.frr}/libexec/frr/bgpd -f ${frrBgpdConfig} -A 127.0.0.1 -P 2605 -p 179 -i /run/frr/bgpd.pid -z /run/frr/zserv.api -S --log stdout"; Restart = "on-failure"; RestartSec = "2s"; }; }; system.stateVersion = "24.11"; }; bird = { ... }: { networking.hostName = "bird"; networking.useDHCP = false; networking.firewall.enable = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.ipv4.addresses = [ { address = "192.168.100.3"; prefixLength = 24; } ]; environment.systemPackages = with pkgs; [ bird2 jq ]; systemd.services.bird-peer = { description = "BIRD peer for FiberLB interop smoke"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; serviceConfig = { Type = "simple"; ExecStart = "${pkgs.bird2}/bin/bird -f -c ${birdConfig} -s /run/bird.ctl"; Restart = "on-failure"; RestartSec = "2s"; }; }; system.stateVersion = "24.11"; }; gobgp = { ... }: { networking.hostName = "gobgp"; networking.useDHCP = false; networking.firewall.enable = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.ipv4.addresses = [ { address = "192.168.100.4"; prefixLength = 24; } ]; environment.systemPackages = with pkgs; [ gobgp gobgpd jq ]; systemd.services.gobgpd-peer = { description = "GoBGP peer for FiberLB interop smoke"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; serviceConfig = { Type = "simple"; ExecStart = "${pkgs.gobgpd}/bin/gobgpd -t json -f ${gobgpdConfig} --api-hosts 127.0.0.1:50051 -p"; Restart = "on-failure"; RestartSec = "2s"; }; }; system.stateVersion = "24.11"; }; lb = { ... }: { imports = [ nixNosModule photoncloudModule ]; networking.hostName = "lb"; networking.useDHCP = false; networking.firewall.enable = false; virtualisation.vlans = [ 1 ]; networking.interfaces.eth1.ipv4.addresses = [ { address = "192.168.100.2"; prefixLength = 24; } ]; environment.systemPackages = with pkgs; [ curl grpcurl jq python3 ]; services.iam = { enable = true; package = photoncloudPackages.iam-server; port = 50080; httpPort = 8083; storeBackend = "memory"; }; systemd.services.iam.environment = { IAM_ALLOW_RANDOM_SIGNING_KEY = "1"; }; services.fiberlb = { enable = true; package = photoncloudPackages.fiberlb-server; port = 50085; iamAddr = "192.168.100.2:50080"; metadataBackend = "sqlite"; databaseUrl = "sqlite:/var/lib/fiberlb/metadata.db"; singleNode = true; healthCheckIntervalSecs = 1; healthCheckTimeoutSecs = 1; vipCheckIntervalSecs = 1; vipOwnership = { enable = true; interface = "lo"; }; bgp = { enable = true; localAs = 65010; routerId = "192.168.100.2"; nextHop = "192.168.100.2"; holdTimeSecs = 9; keepaliveSecs = 3; peers = [ { address = "192.168.100.1"; port = 179; asn = 65020; description = "frr-peer"; med = 10; communities = [ "65010:101" ]; } { address = "192.168.100.3"; port = 179; asn = 65030; description = "bird-peer"; med = 20; communities = [ "65010:202" ]; } { address = "192.168.100.4"; port = 179; asn = 65040; description = "gobgp-peer"; med = 30; communities = [ "65010:303" ]; } ]; }; }; systemd.services.mock-backend = { description = "FiberLB interop backend"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; serviceConfig = { Type = "simple"; ExecStart = "${pkgs.python3}/bin/python ${backendScript}"; Restart = "always"; RestartSec = "1s"; }; }; system.stateVersion = "24.11"; }; }; testScript = '' import json import re import shlex import time IAM_PROTO_DIR = "${iamProtoDir}" IAM_PROTO = "${iamProto}" FIBERLB_PROTO_DIR = "${fiberlbProtoDir}" FIBERLB_PROTO = "${fiberlbProto}" METRIC_RE = re.compile(r"^([a-zA-Z_:][a-zA-Z0-9_:]*)(?:\{([^}]*)\})?\s+([-+0-9.eE]+)$") def grpcurl_json(machine, endpoint, import_path, proto, service, payload, headers=None): header_args = "" for header in headers or []: header_args += f" -H {shlex.quote(header)}" command = ( f"grpcurl -plaintext{header_args} " f"-import-path {shlex.quote(import_path)} " f"-proto {shlex.quote(proto)} " f"-d {shlex.quote(json.dumps(payload))} " f"{shlex.quote(endpoint)} {shlex.quote(service)}" ) status, output = machine.execute(f"timeout 15 sh -lc {shlex.quote(command + ' 2>&1')}") if status != 0: raise AssertionError( "grpcurl failed" f" service={service}" f" status={status}" f" payload={json.dumps(payload, sort_keys=True)}" f" output={output}" ) return json.loads(output) def issue_project_admin_token(machine, org_id, project_id): principal_id = f"fiberlb-interop-{int(time.time())}" deadline = time.time() + 120 def retry(action): last_error = None while time.time() < deadline: try: return action() except Exception as exc: last_error = exc time.sleep(2) raise AssertionError(f"IAM bootstrap timed out: {last_error}") retry(lambda: grpcurl_json( machine, "127.0.0.1:50080", IAM_PROTO_DIR, IAM_PROTO, "iam.v1.IamAdmin/CreatePrincipal", { "id": principal_id, "kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT", "name": principal_id, "orgId": org_id, "projectId": project_id, }, )) retry(lambda: grpcurl_json( machine, "127.0.0.1:50080", IAM_PROTO_DIR, IAM_PROTO, "iam.v1.IamAdmin/CreateBinding", { "principal": { "kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT", "id": principal_id, }, "role": "roles/ProjectAdmin", "scope": { "project": { "id": project_id, "orgId": org_id, } }, }, )) token_response = retry(lambda: grpcurl_json( machine, "127.0.0.1:50080", IAM_PROTO_DIR, IAM_PROTO, "iam.v1.IamToken/IssueToken", { "principalId": principal_id, "principalKind": "PRINCIPAL_KIND_SERVICE_ACCOUNT", "scope": { "project": { "id": project_id, "orgId": org_id, } }, "ttlSeconds": 3600, }, )) return token_response["token"] def wait_for_backend_status(status, backend_id, token): lb.wait_until_succeeds( "grpcurl -plaintext " f"-H {shlex.quote('authorization: Bearer ' + token)} " f"-import-path {shlex.quote(FIBERLB_PROTO_DIR)} " f"-proto {shlex.quote(FIBERLB_PROTO)} " f"-d {shlex.quote(json.dumps({'id': backend_id}))} " "127.0.0.1:50085 fiberlb.v1.BackendService/GetBackend " f"| jq -e {shlex.quote(f'.backend.status == \"{status}\"')}" ) def parse_labels(label_blob): if not label_blob: return {} labels = {} for part in label_blob.split(","): key, value = part.split("=", 1) labels[key] = value.strip().strip('"') return labels def fiberlb_diagnostics(): metrics = lb.succeed("curl -fsS http://127.0.0.1:9098/metrics || true") journal = lb.succeed("journalctl -u fiberlb.service -n 200 --no-pager || true") return ( "fiberlb metrics:\n" f"{metrics}\n" "fiberlb journal:\n" f"{journal}" ) def wait_for_metric(metric_name, expected_value, labels=None): expected_labels = labels or {} deadline = time.time() + 60 last_exposition = "" while time.time() < deadline: exposition = lb.succeed("curl -fsS http://127.0.0.1:9098/metrics") last_exposition = exposition for line in exposition.splitlines(): line = line.strip() if not line or line.startswith("#"): continue match = METRIC_RE.match(line) if not match: continue name, label_blob, value = match.groups() if name != metric_name: continue if parse_labels(label_blob) != expected_labels: continue if abs(float(value) - float(expected_value)) < 0.0001: return time.sleep(1) raise AssertionError( f"metric {metric_name} with labels={expected_labels} did not reach {expected_value}\n" f"last metrics scrape:\n{last_exposition}\n" f"{fiberlb_diagnostics()}" ) def wait_for_local_vip(vip): lb.wait_until_succeeds(f"ip -4 addr show dev lo | grep -F {shlex.quote('inet ' + vip + '/32')}") def wait_for_gobgp_route(prefix, present): command = "gobgp -u 127.0.0.1 -p 50051 global rib || true" if present: gobgp.wait_until_succeeds(f"{command} | grep -F {shlex.quote(prefix)}") else: deadline = time.time() + 60 while time.time() < deadline: output = gobgp.succeed(command) if prefix not in output: return time.sleep(1) raise AssertionError(f"route {prefix} still present in GoBGP RIB") def wait_for_bird_route(prefix): bird.wait_until_succeeds( f"birdc -s /run/bird.ctl show route for {shlex.quote(prefix)} all | grep -F {shlex.quote(prefix)}" ) def wait_for_frr_route(prefix): frr.wait_until_succeeds( f"vtysh -c {shlex.quote('show ip bgp ' + prefix)} | grep -F {shlex.quote(prefix)}" ) def wait_for_unit_or_dump(machine, unit): deadline = time.time() + 120 while time.time() < deadline: status, output = machine.execute(f"systemctl is-active {shlex.quote(unit)}") state = output.strip() if status == 0 and state == "active": return if state == "failed": service_status = machine.succeed( f"systemctl status {shlex.quote(unit)} --no-pager || true" ) journal = machine.succeed( f"journalctl -u {shlex.quote(unit)} -n 200 --no-pager || true" ) raise AssertionError( f"unit {unit} failed to start\n" f"systemctl status:\n{service_status}\n" f"journal:\n{journal}" ) time.sleep(1) service_status = machine.succeed( f"systemctl status {shlex.quote(unit)} --no-pager || true" ) journal = machine.succeed( f"journalctl -u {shlex.quote(unit)} -n 200 --no-pager || true" ) raise AssertionError( f"unit {unit} did not become active before timeout\n" f"systemctl status:\n{service_status}\n" f"journal:\n{journal}" ) def wait_for_command_or_dump(machine, command, unit=None, timeout=120): deadline = time.time() + timeout last_output = "" while time.time() < deadline: status, output = machine.execute(f"sh -lc {shlex.quote(command + ' 2>&1')}") last_output = output if status == 0: return time.sleep(1) diagnostics = f"last command output:\n{last_output}\n" if unit is not None: diagnostics += ( f"systemctl status:\n{machine.succeed(f'systemctl status {shlex.quote(unit)} --no-pager || true')}\n" f"journal:\n{machine.succeed(f'journalctl -u {shlex.quote(unit)} -n 200 --no-pager || true')}\n" ) diagnostics += f"socket state:\n{machine.succeed('ss -ltnp || true')}\n" raise AssertionError( f"command did not succeed before timeout: {command}\n{diagnostics}" ) start_all() serial_stdout_off() wait_for_unit_or_dump(frr, "frr-zebra.service") wait_for_command_or_dump(frr, "test -S /run/frr/zserv.api", "frr-zebra.service") wait_for_unit_or_dump(frr, "frr-bgpd.service") wait_for_command_or_dump( frr, "ss -ltnH '( sport = :179 )' | grep -q LISTEN", "frr-bgpd.service", ) wait_for_unit_or_dump(bird, "bird-peer.service") wait_for_unit_or_dump(gobgp, "gobgpd-peer.service") wait_for_command_or_dump( gobgp, "ss -ltnH '( sport = :179 )' | grep -q LISTEN", "gobgpd-peer.service", ) wait_for_unit_or_dump(lb, "iam.service") wait_for_command_or_dump(lb, "ss -ltnH '( sport = :50080 )' | grep -q LISTEN", "iam.service") wait_for_unit_or_dump(lb, "mock-backend.service") wait_for_unit_or_dump(lb, "fiberlb.service") wait_for_command_or_dump(lb, "ss -ltnH '( sport = :50085 )' | grep -q LISTEN", "fiberlb.service") wait_for_command_or_dump(lb, "ss -ltnH '( sport = :9098 )' | grep -q LISTEN", "fiberlb.service") frr.wait_until_succeeds("vtysh -c 'show ip bgp neighbor 192.168.100.2' | grep -F 'BGP state = Established'") bird.wait_until_succeeds("birdc -s /run/bird.ctl show protocols all fiberlb_peer | grep -F Established") gobgp.wait_until_succeeds("gobgp -u 127.0.0.1 -p 50051 neighbor | grep -F 192.168.100.2") wait_for_metric("fiberlb_bgp_configured_peers", 3) wait_for_metric("fiberlb_bgp_peer_session_up", 1, {"peer": "192.168.100.1:179"}) wait_for_metric("fiberlb_bgp_peer_session_up", 1, {"peer": "192.168.100.3:179"}) wait_for_metric("fiberlb_bgp_peer_session_up", 1, {"peer": "192.168.100.4:179"}) wait_for_metric("fiberlb_bgp_connected_peers", 3) token = issue_project_admin_token(lb, "bgp-interop-org", "bgp-interop-project") lb_response = grpcurl_json( lb, "127.0.0.1:50085", FIBERLB_PROTO_DIR, FIBERLB_PROTO, "fiberlb.v1.LoadBalancerService/CreateLoadBalancer", { "name": "bgp-interop-lb", "orgId": "bgp-interop-org", "projectId": "bgp-interop-project", "description": "native bgp interop smoke", "vipAddress": "203.0.113.77", }, headers=[f"authorization: Bearer {token}"], ) loadbalancer = lb_response["loadbalancer"] lb_id = loadbalancer["id"] vip = loadbalancer["vipAddress"] vip_prefix = f"{vip}/32" pool_id = grpcurl_json( lb, "127.0.0.1:50085", FIBERLB_PROTO_DIR, FIBERLB_PROTO, "fiberlb.v1.PoolService/CreatePool", { "name": "bgp-interop-pool", "loadbalancerId": lb_id, "algorithm": "POOL_ALGORITHM_ROUND_ROBIN", "protocol": "POOL_PROTOCOL_TCP", }, headers=[f"authorization: Bearer {token}"], )["pool"]["id"] backend_id = grpcurl_json( lb, "127.0.0.1:50085", FIBERLB_PROTO_DIR, FIBERLB_PROTO, "fiberlb.v1.BackendService/CreateBackend", { "name": "bgp-interop-backend", "poolId": pool_id, "address": "127.0.0.1", "port": 18081, "weight": 1, }, headers=[f"authorization: Bearer {token}"], )["backend"]["id"] grpcurl_json( lb, "127.0.0.1:50085", FIBERLB_PROTO_DIR, FIBERLB_PROTO, "fiberlb.v1.HealthCheckService/CreateHealthCheck", { "name": "bgp-interop-health", "poolId": pool_id, "type": "HEALTH_CHECK_TYPE_HTTP", "intervalSeconds": 1, "timeoutSeconds": 1, "healthyThreshold": 1, "unhealthyThreshold": 1, "httpConfig": { "method": "GET", "path": "/", "expectedCodes": [200], }, }, headers=[f"authorization: Bearer {token}"], ) grpcurl_json( lb, "127.0.0.1:50085", FIBERLB_PROTO_DIR, FIBERLB_PROTO, "fiberlb.v1.ListenerService/CreateListener", { "name": "bgp-interop-listener", "loadbalancerId": lb_id, "protocol": "LISTENER_PROTOCOL_TCP", "port": 18080, "defaultPoolId": pool_id, }, headers=[f"authorization: Bearer {token}"], ) wait_for_backend_status("BACKEND_STATUS_ONLINE", backend_id, token) wait_for_local_vip(vip) wait_for_metric("fiberlb_bgp_desired_routes", 1) wait_for_frr_route(vip_prefix) wait_for_bird_route(vip_prefix) wait_for_gobgp_route(vip_prefix, True) frr.wait_until_succeeds( "vtysh -c 'show ip bgp 203.0.113.77/32' | grep -F 'metric 10'" ) frr.wait_until_succeeds( "vtysh -c 'show ip bgp 203.0.113.77/32' | grep -F 'Community: 65010:101'" ) bird.wait_until_succeeds( "birdc -s /run/bird.ctl show route for 203.0.113.77/32 all | grep -F 'BGP.med: 20'" ) bird.wait_until_succeeds( "birdc -s /run/bird.ctl show route for 203.0.113.77/32 all | grep -F 'BGP.community: (65010,202)'" ) gobgp.succeed("systemctl stop gobgpd-peer.service") wait_for_metric("fiberlb_bgp_connected_peers", 2) wait_for_metric("fiberlb_bgp_peer_session_up", 0, {"peer": "192.168.100.4:179"}) wait_for_frr_route(vip_prefix) wait_for_bird_route(vip_prefix) gobgp.succeed("systemctl start gobgpd-peer.service") wait_for_unit_or_dump(gobgp, "gobgpd-peer.service") gobgp.wait_until_succeeds("gobgp -u 127.0.0.1 -p 50051 neighbor | grep -F 192.168.100.2") wait_for_metric("fiberlb_bgp_connected_peers", 3) wait_for_metric("fiberlb_bgp_peer_session_up", 1, {"peer": "192.168.100.4:179"}) wait_for_gobgp_route(vip_prefix, True) ''; }