745 lines
25 KiB
Nix
745 lines
25 KiB
Nix
{
|
|
pkgs,
|
|
photoncloudPackages,
|
|
photoncloudModule,
|
|
nixNosModule,
|
|
}:
|
|
|
|
let
|
|
edgeZebraConfig = pkgs.writeText "fiberlb-ecmp-edge-zebra.conf" ''
|
|
hostname edge-zebra
|
|
log stdout debugging
|
|
'';
|
|
edgeBgpdConfig = pkgs.writeText "fiberlb-ecmp-edge-bgpd.conf" ''
|
|
hostname edge-frr
|
|
log stdout debugging
|
|
|
|
router bgp 65020
|
|
bgp router-id 192.168.100.1
|
|
no bgp ebgp-requires-policy
|
|
bgp bestpath as-path multipath-relax
|
|
neighbor 192.168.100.2 remote-as 65010
|
|
neighbor 192.168.100.2 description fiberlb-a
|
|
neighbor 192.168.100.3 remote-as 65010
|
|
neighbor 192.168.100.3 description fiberlb-b
|
|
!
|
|
address-family ipv4 unicast
|
|
maximum-paths 8
|
|
neighbor 192.168.100.2 activate
|
|
neighbor 192.168.100.3 activate
|
|
exit-address-family
|
|
!
|
|
'';
|
|
iamProtoDir = ../../iam/proto;
|
|
iamProto = "iam.proto";
|
|
fiberlbProtoDir = ../../fiberlb/crates/fiberlb-api/proto;
|
|
fiberlbProto = "fiberlb.proto";
|
|
backendScriptA = pkgs.writeText "fiberlb-ecmp-backend-a.py" ''
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
|
|
|
|
class Handler(BaseHTTPRequestHandler):
|
|
def do_GET(self):
|
|
body = b"fiberlb ecmp backend a\n"
|
|
self.send_response(200)
|
|
self.send_header("Content-Type", "text/plain; charset=utf-8")
|
|
self.send_header("Content-Length", str(len(body)))
|
|
self.end_headers()
|
|
self.wfile.write(body)
|
|
|
|
def log_message(self, format, *args):
|
|
return
|
|
|
|
|
|
HTTPServer(("127.0.0.1", 18081), Handler).serve_forever()
|
|
'';
|
|
backendScriptB = pkgs.writeText "fiberlb-ecmp-backend-b.py" ''
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
|
|
|
|
class Handler(BaseHTTPRequestHandler):
|
|
def do_GET(self):
|
|
body = b"fiberlb ecmp backend b\n"
|
|
self.send_response(200)
|
|
self.send_header("Content-Type", "text/plain; charset=utf-8")
|
|
self.send_header("Content-Length", str(len(body)))
|
|
self.end_headers()
|
|
self.wfile.write(body)
|
|
|
|
def log_message(self, format, *args):
|
|
return
|
|
|
|
|
|
HTTPServer(("127.0.0.1", 18081), Handler).serve_forever()
|
|
'';
|
|
in
|
|
{
|
|
name = "fiberlb-native-bgp-ecmp-drain-vm-smoke";
|
|
|
|
nodes = {
|
|
edge =
|
|
{ ... }:
|
|
{
|
|
networking.hostName = "edge";
|
|
networking.useDHCP = false;
|
|
networking.firewall.enable = false;
|
|
virtualisation.vlans = [ 1 ];
|
|
networking.interfaces.eth1.ipv4.addresses = [
|
|
{
|
|
address = "192.168.100.1";
|
|
prefixLength = 24;
|
|
}
|
|
];
|
|
|
|
environment.systemPackages = with pkgs; [
|
|
curl
|
|
frr
|
|
iproute2
|
|
jq
|
|
];
|
|
|
|
users.groups.frr = { };
|
|
users.groups.frrvty = { };
|
|
users.users.frr = {
|
|
isSystemUser = true;
|
|
group = "frr";
|
|
extraGroups = [ "frrvty" ];
|
|
};
|
|
users.users.root.extraGroups = [ "frrvty" ];
|
|
|
|
systemd.services.frr-zebra = {
|
|
description = "FRR zebra for FiberLB ECMP smoke";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network.target" ];
|
|
serviceConfig = {
|
|
Type = "simple";
|
|
RuntimeDirectory = "frr";
|
|
RuntimeDirectoryMode = "0755";
|
|
ExecStartPre = "${pkgs.runtimeShell} -lc '${pkgs.coreutils}/bin/install -d -o root -g root /run/frr /var/run/frr && ${pkgs.coreutils}/bin/rm -f /run/frr/zebra.pid /var/run/frr/zebra.pid'";
|
|
ExecStart = "${pkgs.frr}/libexec/frr/zebra -f ${edgeZebraConfig} -A 127.0.0.1 -P 2601 -i /run/frr/zebra.pid -z /run/frr/zserv.api -u root -g root --log stdout";
|
|
Restart = "on-failure";
|
|
RestartSec = "2s";
|
|
};
|
|
};
|
|
|
|
systemd.services.frr-bgpd = {
|
|
description = "FRR bgpd for FiberLB ECMP smoke";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network.target" "frr-zebra.service" ];
|
|
requires = [ "frr-zebra.service" ];
|
|
serviceConfig = {
|
|
Type = "simple";
|
|
RuntimeDirectory = "frr";
|
|
RuntimeDirectoryMode = "0755";
|
|
ExecStartPre = "${pkgs.runtimeShell} -lc '${pkgs.coreutils}/bin/install -d -o root -g root /run/frr /var/run/frr && ${pkgs.coreutils}/bin/rm -f /run/frr/bgpd.pid /var/run/frr/bgpd.pid && for _ in $(seq 1 30); do [ -S /run/frr/zserv.api ] && exit 0; sleep 1; done; echo zserv socket did not appear >&2; exit 1'";
|
|
ExecStart = "${pkgs.frr}/libexec/frr/bgpd -f ${edgeBgpdConfig} -A 127.0.0.1 -P 2605 -p 179 -i /run/frr/bgpd.pid -z /run/frr/zserv.api -S --log stdout";
|
|
Restart = "on-failure";
|
|
RestartSec = "2s";
|
|
};
|
|
};
|
|
|
|
system.stateVersion = "24.11";
|
|
};
|
|
|
|
lb_a =
|
|
{ ... }:
|
|
{
|
|
imports = [
|
|
nixNosModule
|
|
photoncloudModule
|
|
];
|
|
|
|
networking.hostName = "lb-a";
|
|
networking.useDHCP = false;
|
|
networking.firewall.enable = false;
|
|
virtualisation.vlans = [ 1 ];
|
|
networking.interfaces.eth1.ipv4.addresses = [
|
|
{
|
|
address = "192.168.100.2";
|
|
prefixLength = 24;
|
|
}
|
|
];
|
|
|
|
environment.systemPackages = with pkgs; [
|
|
curl
|
|
grpcurl
|
|
jq
|
|
python3
|
|
];
|
|
|
|
services.iam = {
|
|
enable = true;
|
|
package = photoncloudPackages.iam-server;
|
|
port = 50080;
|
|
httpPort = 8083;
|
|
storeBackend = "memory";
|
|
};
|
|
|
|
systemd.services.iam.environment = {
|
|
IAM_ALLOW_RANDOM_SIGNING_KEY = "1";
|
|
};
|
|
|
|
services.fiberlb = {
|
|
enable = true;
|
|
package = photoncloudPackages.fiberlb-server;
|
|
port = 50085;
|
|
iamAddr = "192.168.100.2:50080";
|
|
metadataBackend = "sqlite";
|
|
databaseUrl = "sqlite:/var/lib/fiberlb/metadata.db";
|
|
singleNode = true;
|
|
healthCheckIntervalSecs = 1;
|
|
healthCheckTimeoutSecs = 1;
|
|
vipCheckIntervalSecs = 1;
|
|
vipDrain.holdTimeSecs = 3;
|
|
vipOwnership = {
|
|
enable = true;
|
|
interface = "lo";
|
|
};
|
|
bgp = {
|
|
enable = true;
|
|
localAs = 65010;
|
|
routerId = "192.168.100.2";
|
|
nextHop = "192.168.100.2";
|
|
holdTimeSecs = 30;
|
|
keepaliveSecs = 10;
|
|
peers = [
|
|
{
|
|
address = "192.168.100.1";
|
|
port = 179;
|
|
asn = 65020;
|
|
description = "edge";
|
|
}
|
|
];
|
|
};
|
|
};
|
|
|
|
systemd.services.mock-backend = {
|
|
description = "FiberLB ECMP backend A";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network.target" ];
|
|
serviceConfig = {
|
|
Type = "simple";
|
|
ExecStart = "${pkgs.python3}/bin/python ${backendScriptA}";
|
|
Restart = "always";
|
|
RestartSec = "1s";
|
|
};
|
|
};
|
|
|
|
system.stateVersion = "24.11";
|
|
};
|
|
|
|
lb_b =
|
|
{ ... }:
|
|
{
|
|
imports = [
|
|
nixNosModule
|
|
photoncloudModule
|
|
];
|
|
|
|
networking.hostName = "lb-b";
|
|
networking.useDHCP = false;
|
|
networking.firewall.enable = false;
|
|
virtualisation.vlans = [ 1 ];
|
|
networking.interfaces.eth1.ipv4.addresses = [
|
|
{
|
|
address = "192.168.100.3";
|
|
prefixLength = 24;
|
|
}
|
|
];
|
|
|
|
environment.systemPackages = with pkgs; [
|
|
curl
|
|
grpcurl
|
|
jq
|
|
python3
|
|
];
|
|
|
|
services.iam = {
|
|
enable = true;
|
|
package = photoncloudPackages.iam-server;
|
|
port = 50080;
|
|
httpPort = 8083;
|
|
storeBackend = "memory";
|
|
};
|
|
|
|
systemd.services.iam.environment = {
|
|
IAM_ALLOW_RANDOM_SIGNING_KEY = "1";
|
|
};
|
|
|
|
services.fiberlb = {
|
|
enable = true;
|
|
package = photoncloudPackages.fiberlb-server;
|
|
port = 50085;
|
|
iamAddr = "192.168.100.3:50080";
|
|
metadataBackend = "sqlite";
|
|
databaseUrl = "sqlite:/var/lib/fiberlb/metadata.db";
|
|
singleNode = true;
|
|
healthCheckIntervalSecs = 1;
|
|
healthCheckTimeoutSecs = 1;
|
|
vipCheckIntervalSecs = 1;
|
|
vipDrain.holdTimeSecs = 3;
|
|
vipOwnership = {
|
|
enable = true;
|
|
interface = "lo";
|
|
};
|
|
bgp = {
|
|
enable = true;
|
|
localAs = 65010;
|
|
routerId = "192.168.100.3";
|
|
nextHop = "192.168.100.3";
|
|
holdTimeSecs = 30;
|
|
keepaliveSecs = 10;
|
|
peers = [
|
|
{
|
|
address = "192.168.100.1";
|
|
port = 179;
|
|
asn = 65020;
|
|
description = "edge";
|
|
}
|
|
];
|
|
};
|
|
};
|
|
|
|
systemd.services.mock-backend = {
|
|
description = "FiberLB ECMP backend B";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network.target" ];
|
|
serviceConfig = {
|
|
Type = "simple";
|
|
ExecStart = "${pkgs.python3}/bin/python ${backendScriptB}";
|
|
Restart = "always";
|
|
RestartSec = "1s";
|
|
};
|
|
};
|
|
|
|
system.stateVersion = "24.11";
|
|
};
|
|
};
|
|
|
|
testScript = ''
|
|
import json
|
|
import re
|
|
import shlex
|
|
import time
|
|
|
|
IAM_PROTO_DIR = "${iamProtoDir}"
|
|
IAM_PROTO = "${iamProto}"
|
|
FIBERLB_PROTO_DIR = "${fiberlbProtoDir}"
|
|
FIBERLB_PROTO = "${fiberlbProto}"
|
|
METRIC_RE = re.compile(r"^([a-zA-Z_:][a-zA-Z0-9_:]*)(?:\{([^}]*)\})?\s+([-+0-9.eE]+)$")
|
|
VIP = "203.0.113.77"
|
|
VIP_PREFIX = f"{VIP}/32"
|
|
LISTENER_URL = f"http://{VIP}:18080/"
|
|
|
|
def grpcurl_json(machine, endpoint, import_path, proto, service, payload, headers=None):
|
|
header_args = ""
|
|
for header in headers or []:
|
|
header_args += f" -H {shlex.quote(header)}"
|
|
command = (
|
|
f"grpcurl -plaintext{header_args} "
|
|
f"-import-path {shlex.quote(import_path)} "
|
|
f"-proto {shlex.quote(proto)} "
|
|
f"-d {shlex.quote(json.dumps(payload))} "
|
|
f"{shlex.quote(endpoint)} {shlex.quote(service)}"
|
|
)
|
|
status, output = machine.execute(f"timeout 15 sh -lc {shlex.quote(command + ' 2>&1')}")
|
|
if status != 0:
|
|
raise AssertionError(
|
|
"grpcurl failed"
|
|
f" service={service}"
|
|
f" status={status}"
|
|
f" payload={json.dumps(payload, sort_keys=True)}"
|
|
f" output={output}"
|
|
)
|
|
return json.loads(output)
|
|
|
|
def issue_project_admin_token(machine, org_id, project_id):
|
|
principal_id = f"fiberlb-ecmp-{machine.name}-{int(time.time())}"
|
|
deadline = time.time() + 120
|
|
|
|
def retry(action):
|
|
last_error = None
|
|
while time.time() < deadline:
|
|
try:
|
|
return action()
|
|
except Exception as exc:
|
|
last_error = exc
|
|
time.sleep(2)
|
|
raise AssertionError(f"IAM bootstrap timed out: {last_error}")
|
|
|
|
retry(lambda: grpcurl_json(
|
|
machine,
|
|
"127.0.0.1:50080",
|
|
IAM_PROTO_DIR,
|
|
IAM_PROTO,
|
|
"iam.v1.IamAdmin/CreatePrincipal",
|
|
{
|
|
"id": principal_id,
|
|
"kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
|
"name": principal_id,
|
|
"orgId": org_id,
|
|
"projectId": project_id,
|
|
},
|
|
))
|
|
retry(lambda: grpcurl_json(
|
|
machine,
|
|
"127.0.0.1:50080",
|
|
IAM_PROTO_DIR,
|
|
IAM_PROTO,
|
|
"iam.v1.IamAdmin/CreateBinding",
|
|
{
|
|
"principal": {
|
|
"kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
|
"id": principal_id,
|
|
},
|
|
"role": "roles/ProjectAdmin",
|
|
"scope": {
|
|
"project": {
|
|
"id": project_id,
|
|
"orgId": org_id,
|
|
}
|
|
},
|
|
},
|
|
))
|
|
token_response = retry(lambda: grpcurl_json(
|
|
machine,
|
|
"127.0.0.1:50080",
|
|
IAM_PROTO_DIR,
|
|
IAM_PROTO,
|
|
"iam.v1.IamToken/IssueToken",
|
|
{
|
|
"principalId": principal_id,
|
|
"principalKind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
|
"scope": {
|
|
"project": {
|
|
"id": project_id,
|
|
"orgId": org_id,
|
|
}
|
|
},
|
|
"ttlSeconds": 3600,
|
|
},
|
|
))
|
|
return token_response["token"]
|
|
|
|
def create_load_balancer(machine, token, name_suffix):
|
|
response = grpcurl_json(
|
|
machine,
|
|
"127.0.0.1:50085",
|
|
FIBERLB_PROTO_DIR,
|
|
FIBERLB_PROTO,
|
|
"fiberlb.v1.LoadBalancerService/CreateLoadBalancer",
|
|
{
|
|
"name": f"bgp-ecmp-{name_suffix}",
|
|
"orgId": "bgp-ecmp-org",
|
|
"projectId": "bgp-ecmp-project",
|
|
"description": f"native bgp ecmp {name_suffix}",
|
|
"vipAddress": VIP,
|
|
},
|
|
headers=[f"authorization: Bearer {token}"],
|
|
)
|
|
lb_id = response["loadbalancer"]["id"]
|
|
pool_id = grpcurl_json(
|
|
machine,
|
|
"127.0.0.1:50085",
|
|
FIBERLB_PROTO_DIR,
|
|
FIBERLB_PROTO,
|
|
"fiberlb.v1.PoolService/CreatePool",
|
|
{
|
|
"name": f"bgp-ecmp-pool-{name_suffix}",
|
|
"loadbalancerId": lb_id,
|
|
"algorithm": "POOL_ALGORITHM_ROUND_ROBIN",
|
|
"protocol": "POOL_PROTOCOL_TCP",
|
|
},
|
|
headers=[f"authorization: Bearer {token}"],
|
|
)["pool"]["id"]
|
|
backend_id = grpcurl_json(
|
|
machine,
|
|
"127.0.0.1:50085",
|
|
FIBERLB_PROTO_DIR,
|
|
FIBERLB_PROTO,
|
|
"fiberlb.v1.BackendService/CreateBackend",
|
|
{
|
|
"name": f"bgp-ecmp-backend-{name_suffix}",
|
|
"poolId": pool_id,
|
|
"address": "127.0.0.1",
|
|
"port": 18081,
|
|
"weight": 1,
|
|
},
|
|
headers=[f"authorization: Bearer {token}"],
|
|
)["backend"]["id"]
|
|
grpcurl_json(
|
|
machine,
|
|
"127.0.0.1:50085",
|
|
FIBERLB_PROTO_DIR,
|
|
FIBERLB_PROTO,
|
|
"fiberlb.v1.HealthCheckService/CreateHealthCheck",
|
|
{
|
|
"name": f"bgp-ecmp-health-{name_suffix}",
|
|
"poolId": pool_id,
|
|
"type": "HEALTH_CHECK_TYPE_HTTP",
|
|
"intervalSeconds": 1,
|
|
"timeoutSeconds": 1,
|
|
"healthyThreshold": 1,
|
|
"unhealthyThreshold": 1,
|
|
"httpConfig": {
|
|
"method": "GET",
|
|
"path": "/",
|
|
"expectedCodes": [200],
|
|
},
|
|
},
|
|
headers=[f"authorization: Bearer {token}"],
|
|
)
|
|
grpcurl_json(
|
|
machine,
|
|
"127.0.0.1:50085",
|
|
FIBERLB_PROTO_DIR,
|
|
FIBERLB_PROTO,
|
|
"fiberlb.v1.ListenerService/CreateListener",
|
|
{
|
|
"name": f"bgp-ecmp-listener-{name_suffix}",
|
|
"loadbalancerId": lb_id,
|
|
"protocol": "LISTENER_PROTOCOL_TCP",
|
|
"port": 18080,
|
|
"defaultPoolId": pool_id,
|
|
},
|
|
headers=[f"authorization: Bearer {token}"],
|
|
)
|
|
return backend_id
|
|
|
|
def wait_for_backend_status(machine, status, backend_id, token):
|
|
machine.wait_until_succeeds(
|
|
"grpcurl -plaintext "
|
|
f"-H {shlex.quote('authorization: Bearer ' + token)} "
|
|
f"-import-path {shlex.quote(FIBERLB_PROTO_DIR)} "
|
|
f"-proto {shlex.quote(FIBERLB_PROTO)} "
|
|
f"-d {shlex.quote(json.dumps({'id': backend_id}))} "
|
|
"127.0.0.1:50085 fiberlb.v1.BackendService/GetBackend "
|
|
f"| jq -e {shlex.quote(f'.backend.status == \"{status}\"')}"
|
|
)
|
|
|
|
def machine_diagnostics(machine, unit):
|
|
metrics = machine.succeed("curl -fsS http://127.0.0.1:9098/metrics || true")
|
|
service_status = machine.succeed(
|
|
f"systemctl status {shlex.quote(unit)} --no-pager || true"
|
|
)
|
|
journal = machine.succeed(
|
|
f"journalctl -u {shlex.quote(unit)} -n 200 --no-pager || true"
|
|
)
|
|
return (
|
|
f"metrics:\n{metrics}\n"
|
|
f"systemctl status:\n{service_status}\n"
|
|
f"journal:\n{journal}"
|
|
)
|
|
|
|
def edge_bgp_diagnostics():
|
|
bgpd_status = edge.succeed("systemctl status frr-bgpd.service --no-pager || true")
|
|
bgpd_journal = edge.succeed("journalctl -u frr-bgpd.service -n 200 --no-pager || true")
|
|
bgp_summary = edge.succeed("vtysh -c 'show ip bgp summary' || true")
|
|
bgp_route = edge.succeed(f"vtysh -c 'show ip bgp {VIP_PREFIX}' || true")
|
|
zebra_route = edge.succeed(f"vtysh -c 'show ip route {VIP_PREFIX}' || true")
|
|
kernel_route = edge.succeed(f"ip route show {VIP_PREFIX} || true")
|
|
return (
|
|
"edge frr-bgpd status:\n"
|
|
f"{bgpd_status}\n"
|
|
"edge frr-bgpd journal:\n"
|
|
f"{bgpd_journal}\n"
|
|
"edge BGP summary:\n"
|
|
f"{bgp_summary}\n"
|
|
f"edge BGP route {VIP_PREFIX}:\n"
|
|
f"{bgp_route}\n"
|
|
f"edge zebra route {VIP_PREFIX}:\n"
|
|
f"{zebra_route}\n"
|
|
f"edge kernel route {VIP_PREFIX}:\n"
|
|
f"{kernel_route}\n"
|
|
)
|
|
|
|
def wait_for_unit_or_dump(machine, unit):
|
|
deadline = time.time() + 120
|
|
while time.time() < deadline:
|
|
status, output = machine.execute(f"systemctl is-active {shlex.quote(unit)}")
|
|
state = output.strip()
|
|
if status == 0 and state == "active":
|
|
return
|
|
if state == "failed":
|
|
raise AssertionError(
|
|
f"unit {unit} failed to start\n{machine_diagnostics(machine, unit)}"
|
|
)
|
|
time.sleep(1)
|
|
|
|
raise AssertionError(
|
|
f"unit {unit} did not become active before timeout\n{machine_diagnostics(machine, unit)}"
|
|
)
|
|
|
|
def wait_for_command_or_dump(machine, command, unit=None, timeout=120):
|
|
deadline = time.time() + timeout
|
|
last_output = ""
|
|
while time.time() < deadline:
|
|
status, output = machine.execute(f"sh -lc {shlex.quote(command + ' 2>&1')}")
|
|
last_output = output
|
|
if status == 0:
|
|
return
|
|
time.sleep(1)
|
|
|
|
diagnostics = f"last command output:\n{last_output}\n"
|
|
if unit is not None:
|
|
diagnostics += machine_diagnostics(machine, unit)
|
|
diagnostics += f"socket state:\n{machine.succeed('ss -ltnp || true')}\n"
|
|
raise AssertionError(
|
|
f"command did not succeed before timeout: {command}\n{diagnostics}"
|
|
)
|
|
|
|
def parse_labels(label_blob):
|
|
if not label_blob:
|
|
return {}
|
|
labels = {}
|
|
for part in label_blob.split(","):
|
|
key, value = part.split("=", 1)
|
|
labels[key] = value.strip().strip('"')
|
|
return labels
|
|
|
|
def wait_for_metric(machine, metric_name, expected_value, labels=None):
|
|
expected_labels = labels or {}
|
|
deadline = time.time() + 60
|
|
last_exposition = ""
|
|
|
|
while time.time() < deadline:
|
|
exposition = machine.succeed("curl -fsS http://127.0.0.1:9098/metrics")
|
|
last_exposition = exposition
|
|
for line in exposition.splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
match = METRIC_RE.match(line)
|
|
if not match:
|
|
continue
|
|
name, label_blob, value = match.groups()
|
|
if name != metric_name:
|
|
continue
|
|
if parse_labels(label_blob) != expected_labels:
|
|
continue
|
|
if abs(float(value) - float(expected_value)) < 0.0001:
|
|
return
|
|
time.sleep(1)
|
|
|
|
raise AssertionError(
|
|
f"metric {metric_name} with labels={expected_labels} did not reach {expected_value}\n"
|
|
f"last metrics scrape:\n{last_exposition}\n"
|
|
f"{machine_diagnostics(machine, 'fiberlb.service')}\n"
|
|
f"{edge_bgp_diagnostics()}"
|
|
)
|
|
|
|
def wait_for_local_vip(machine, present):
|
|
pattern = f"inet {VIP}/32"
|
|
if present:
|
|
machine.wait_until_succeeds(
|
|
f"ip -4 addr show dev lo | grep -F {shlex.quote(pattern)}"
|
|
)
|
|
else:
|
|
deadline = time.time() + 60
|
|
while time.time() < deadline:
|
|
output = machine.succeed("ip -4 addr show dev lo || true")
|
|
if pattern not in output:
|
|
return
|
|
time.sleep(1)
|
|
raise AssertionError(f"VIP {VIP} still present on loopback")
|
|
|
|
def wait_for_edge_route(next_hops):
|
|
deadline = time.time() + 60
|
|
last_output = ""
|
|
while time.time() < deadline:
|
|
output = edge.succeed(f"ip route show {shlex.quote(VIP_PREFIX)} || true")
|
|
last_output = output
|
|
if all(next_hop in output for next_hop in next_hops):
|
|
return
|
|
time.sleep(1)
|
|
raise AssertionError(
|
|
f"edge route for {VIP_PREFIX} did not contain nexthops {next_hops}\n"
|
|
f"last kernel route output:\n{last_output}\n"
|
|
f"{edge_bgp_diagnostics()}"
|
|
)
|
|
|
|
def wait_for_edge_route_absent(needle):
|
|
deadline = time.time() + 60
|
|
last_output = ""
|
|
while time.time() < deadline:
|
|
output = edge.succeed(f"ip route show {shlex.quote(VIP_PREFIX)} || true")
|
|
last_output = output
|
|
if needle not in output:
|
|
return
|
|
time.sleep(1)
|
|
raise AssertionError(
|
|
f"edge route for {VIP_PREFIX} still contained {needle}\n"
|
|
f"last kernel route output:\n{last_output}\n"
|
|
f"{edge_bgp_diagnostics()}"
|
|
)
|
|
|
|
def wait_for_http_any():
|
|
edge.wait_until_succeeds(
|
|
f"curl -fsS --max-time 5 {shlex.quote(LISTENER_URL)} | grep -E 'fiberlb ecmp backend (a|b)'"
|
|
)
|
|
|
|
start_all()
|
|
serial_stdout_off()
|
|
|
|
wait_for_unit_or_dump(edge, "frr-zebra.service")
|
|
wait_for_command_or_dump(edge, "test -S /run/frr/zserv.api", "frr-zebra.service")
|
|
wait_for_unit_or_dump(edge, "frr-bgpd.service")
|
|
wait_for_command_or_dump(
|
|
edge,
|
|
"ss -ltnH '( sport = :179 )' | grep -q LISTEN",
|
|
"frr-bgpd.service",
|
|
)
|
|
|
|
for machine in [lb_a, lb_b]:
|
|
wait_for_unit_or_dump(machine, "iam.service")
|
|
wait_for_command_or_dump(machine, "ss -ltnH '( sport = :50080 )' | grep -q LISTEN", "iam.service")
|
|
wait_for_unit_or_dump(machine, "mock-backend.service")
|
|
wait_for_unit_or_dump(machine, "fiberlb.service")
|
|
wait_for_command_or_dump(machine, "ss -ltnH '( sport = :50085 )' | grep -q LISTEN", "fiberlb.service")
|
|
wait_for_command_or_dump(machine, "ss -ltnH '( sport = :9098 )' | grep -q LISTEN", "fiberlb.service")
|
|
|
|
wait_for_command_or_dump(
|
|
edge,
|
|
"vtysh -c 'show ip bgp neighbor 192.168.100.2' | grep -F 'BGP state = Established'",
|
|
"frr-bgpd.service",
|
|
)
|
|
wait_for_command_or_dump(
|
|
edge,
|
|
"vtysh -c 'show ip bgp neighbor 192.168.100.3' | grep -F 'BGP state = Established'",
|
|
"frr-bgpd.service",
|
|
)
|
|
|
|
token_a = issue_project_admin_token(lb_a, "bgp-ecmp-org", "bgp-ecmp-project")
|
|
token_b = issue_project_admin_token(lb_b, "bgp-ecmp-org", "bgp-ecmp-project")
|
|
backend_a = create_load_balancer(lb_a, token_a, "a")
|
|
backend_b = create_load_balancer(lb_b, token_b, "b")
|
|
|
|
wait_for_backend_status(lb_a, "BACKEND_STATUS_ONLINE", backend_a, token_a)
|
|
wait_for_backend_status(lb_b, "BACKEND_STATUS_ONLINE", backend_b, token_b)
|
|
wait_for_metric(lb_a, "fiberlb_bgp_connected_peers", 1)
|
|
wait_for_metric(lb_b, "fiberlb_bgp_connected_peers", 1)
|
|
wait_for_local_vip(lb_a, True)
|
|
wait_for_local_vip(lb_b, True)
|
|
|
|
wait_for_edge_route(["via 192.168.100.2", "via 192.168.100.3"])
|
|
wait_for_http_any()
|
|
|
|
lb_a.succeed("touch /var/lib/fiberlb/drain")
|
|
wait_for_metric(lb_a, "fiberlb_vip_drain_active", 1)
|
|
wait_for_edge_route(["via 192.168.100.3"])
|
|
wait_for_edge_route_absent("via 192.168.100.2")
|
|
wait_for_local_vip(lb_a, True)
|
|
edge.wait_until_succeeds(
|
|
f"curl -fsS --max-time 5 {shlex.quote(LISTENER_URL)} | grep -F 'fiberlb ecmp backend b'"
|
|
)
|
|
|
|
time.sleep(4)
|
|
wait_for_local_vip(lb_a, False)
|
|
|
|
lb_a.succeed("rm -f /var/lib/fiberlb/drain")
|
|
wait_for_metric(lb_a, "fiberlb_vip_drain_active", 0)
|
|
wait_for_local_vip(lb_a, True)
|
|
wait_for_edge_route(["via 192.168.100.2", "via 192.168.100.3"])
|
|
wait_for_http_any()
|
|
'';
|
|
}
|