Includes all pending changes needed for nixos-anywhere: - fiberlb: L7 policy, rule, certificate types - deployer: New service for cluster management - nix-nos: Generic network modules - Various service updates and fixes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
654 lines
24 KiB
Rust
654 lines
24 KiB
Rust
//! FiberLB Integration Tests
|
|
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
|
|
use fiberlb_server::{DataPlane, HealthChecker, LbMetadataStore};
|
|
use fiberlb_types::{
|
|
Backend, BackendStatus, HealthCheck, HealthCheckType, Listener, ListenerProtocol,
|
|
LoadBalancer, Pool, PoolAlgorithm, PoolProtocol,
|
|
};
|
|
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
|
use tokio::net::{TcpListener, TcpStream};
|
|
use tokio::sync::watch;
|
|
|
|
/// Test 1: Full lifecycle CRUD for all entities
|
|
#[tokio::test]
|
|
async fn test_lb_lifecycle() {
|
|
// 1. Create in-memory metadata store
|
|
let metadata = Arc::new(LbMetadataStore::new_in_memory());
|
|
|
|
// 2. Create LoadBalancer
|
|
let lb = LoadBalancer::new("test-lb", "org-1", "proj-1");
|
|
metadata.save_lb(&lb).await.expect("save lb failed");
|
|
|
|
// Verify LB retrieval
|
|
let loaded_lb = metadata
|
|
.load_lb("org-1", "proj-1", &lb.id)
|
|
.await
|
|
.expect("load lb failed")
|
|
.expect("lb not found");
|
|
assert_eq!(loaded_lb.name, "test-lb");
|
|
assert_eq!(loaded_lb.org_id, "org-1");
|
|
|
|
// 3. Create Listener
|
|
let listener = Listener::new("http-listener", lb.id, ListenerProtocol::Tcp, 8080);
|
|
metadata
|
|
.save_listener(&listener)
|
|
.await
|
|
.expect("save listener failed");
|
|
|
|
// Verify Listener retrieval
|
|
let listeners = metadata
|
|
.list_listeners(&lb.id)
|
|
.await
|
|
.expect("list listeners failed");
|
|
assert_eq!(listeners.len(), 1);
|
|
assert_eq!(listeners[0].port, 8080);
|
|
|
|
// 4. Create Pool
|
|
let pool = Pool::new("backend-pool", lb.id, PoolAlgorithm::RoundRobin, PoolProtocol::Tcp);
|
|
metadata.save_pool(&pool).await.expect("save pool failed");
|
|
|
|
// Verify Pool retrieval
|
|
let pools = metadata.list_pools(&lb.id).await.expect("list pools failed");
|
|
assert_eq!(pools.len(), 1);
|
|
assert_eq!(pools[0].algorithm, PoolAlgorithm::RoundRobin);
|
|
|
|
// 5. Create Backend
|
|
let backend = Backend::new("backend-1", pool.id, "127.0.0.1", 9000);
|
|
metadata
|
|
.save_backend(&backend)
|
|
.await
|
|
.expect("save backend failed");
|
|
|
|
// Verify Backend retrieval
|
|
let backends = metadata
|
|
.list_backends(&pool.id)
|
|
.await
|
|
.expect("list backends failed");
|
|
assert_eq!(backends.len(), 1);
|
|
assert_eq!(backends[0].address, "127.0.0.1");
|
|
assert_eq!(backends[0].port, 9000);
|
|
|
|
// 6. Test listing LBs with filters
|
|
let all_lbs = metadata
|
|
.list_lbs("org-1", None)
|
|
.await
|
|
.expect("list lbs failed");
|
|
assert_eq!(all_lbs.len(), 1);
|
|
|
|
let project_lbs = metadata
|
|
.list_lbs("org-1", Some("proj-1"))
|
|
.await
|
|
.expect("list project lbs failed");
|
|
assert_eq!(project_lbs.len(), 1);
|
|
|
|
// 7. Test delete - clean up sub-resources first (cascade delete is in service layer)
|
|
metadata
|
|
.delete_backend(&backend)
|
|
.await
|
|
.expect("delete backend failed");
|
|
metadata
|
|
.delete_pool(&pool)
|
|
.await
|
|
.expect("delete pool failed");
|
|
metadata
|
|
.delete_listener(&listener)
|
|
.await
|
|
.expect("delete listener failed");
|
|
metadata.delete_lb(&lb).await.expect("delete lb failed");
|
|
|
|
// Verify everything is cleaned up
|
|
let remaining_lbs = metadata
|
|
.list_lbs("org-1", Some("proj-1"))
|
|
.await
|
|
.expect("list failed");
|
|
assert!(remaining_lbs.is_empty());
|
|
}
|
|
|
|
/// Test 2: Multiple backends with round-robin simulation
|
|
#[tokio::test]
|
|
async fn test_multi_backend_pool() {
|
|
let metadata = Arc::new(LbMetadataStore::new_in_memory());
|
|
|
|
// Create LB and Pool
|
|
let lb = LoadBalancer::new("multi-backend-lb", "org-1", "proj-1");
|
|
metadata.save_lb(&lb).await.unwrap();
|
|
|
|
let pool = Pool::new("multi-pool", lb.id, PoolAlgorithm::RoundRobin, PoolProtocol::Tcp);
|
|
metadata.save_pool(&pool).await.unwrap();
|
|
|
|
// Create multiple backends
|
|
for i in 1..=3 {
|
|
let backend = Backend::new(
|
|
&format!("backend-{}", i),
|
|
pool.id,
|
|
"127.0.0.1",
|
|
9000 + i as u16,
|
|
);
|
|
metadata.save_backend(&backend).await.unwrap();
|
|
}
|
|
|
|
// Verify all backends
|
|
let backends = metadata.list_backends(&pool.id).await.unwrap();
|
|
assert_eq!(backends.len(), 3);
|
|
|
|
// Verify different ports
|
|
let ports: Vec<u16> = backends.iter().map(|b| b.port).collect();
|
|
assert!(ports.contains(&9001));
|
|
assert!(ports.contains(&9002));
|
|
assert!(ports.contains(&9003));
|
|
}
|
|
|
|
/// Test 3: Health check status update
|
|
#[tokio::test]
|
|
async fn test_health_check_status_update() {
|
|
let metadata = Arc::new(LbMetadataStore::new_in_memory());
|
|
|
|
// Create LB, Pool, Backend
|
|
let lb = LoadBalancer::new("health-test-lb", "org-1", "proj-1");
|
|
metadata.save_lb(&lb).await.unwrap();
|
|
|
|
let pool = Pool::new("health-pool", lb.id, PoolAlgorithm::RoundRobin, PoolProtocol::Tcp);
|
|
metadata.save_pool(&pool).await.unwrap();
|
|
|
|
// Create backend with unreachable address
|
|
let mut backend = Backend::new("unhealthy-backend", pool.id, "192.0.2.1", 59999);
|
|
backend.status = BackendStatus::Unknown;
|
|
metadata.save_backend(&backend).await.unwrap();
|
|
|
|
// Create health checker with short timeout
|
|
let (shutdown_tx, shutdown_rx) = watch::channel(false);
|
|
let checker =
|
|
HealthChecker::new(metadata.clone(), Duration::from_secs(60), shutdown_rx)
|
|
.with_timeout(Duration::from_millis(100));
|
|
|
|
// Run a single check cycle (not the full loop)
|
|
// We simulate by directly checking the backend
|
|
let check_result = checker_tcp_check(&backend).await;
|
|
assert!(check_result.is_err(), "Should fail on unreachable address");
|
|
|
|
// Update status via metadata
|
|
metadata
|
|
.update_backend_health(&pool.id, &backend.id, BackendStatus::Offline)
|
|
.await
|
|
.unwrap();
|
|
|
|
// Verify status was updated
|
|
let loaded = metadata
|
|
.load_backend(&pool.id, &backend.id)
|
|
.await
|
|
.unwrap()
|
|
.unwrap();
|
|
assert_eq!(loaded.status, BackendStatus::Offline);
|
|
|
|
// Cleanup
|
|
drop(checker);
|
|
let _ = shutdown_tx.send(true);
|
|
}
|
|
|
|
/// Helper: Simulate TCP check
|
|
async fn checker_tcp_check(backend: &Backend) -> Result<(), String> {
|
|
let addr = format!("{}:{}", backend.address, backend.port);
|
|
tokio::time::timeout(
|
|
Duration::from_millis(100),
|
|
TcpStream::connect(&addr),
|
|
)
|
|
.await
|
|
.map_err(|_| "timeout".to_string())?
|
|
.map_err(|e| e.to_string())?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Test 4: DataPlane TCP proxy (requires real TCP server)
|
|
#[tokio::test]
|
|
#[ignore = "Integration test requiring TCP server"]
|
|
async fn test_dataplane_tcp_proxy() {
|
|
let metadata = Arc::new(LbMetadataStore::new_in_memory());
|
|
|
|
// 1. Start mock backend server
|
|
let backend_port = 19000u16;
|
|
let backend_server = tokio::spawn(async move {
|
|
let listener = TcpListener::bind(format!("127.0.0.1:{}", backend_port))
|
|
.await
|
|
.expect("backend bind failed");
|
|
let (mut socket, _) = listener.accept().await.expect("accept failed");
|
|
|
|
// Echo back with prefix
|
|
let mut buf = [0u8; 1024];
|
|
let n = socket.read(&mut buf).await.expect("read failed");
|
|
socket
|
|
.write_all(format!("ECHO: {}", String::from_utf8_lossy(&buf[..n])).as_bytes())
|
|
.await
|
|
.expect("write failed");
|
|
});
|
|
|
|
// Give server time to start
|
|
tokio::time::sleep(Duration::from_millis(50)).await;
|
|
|
|
// 2. Setup LB config
|
|
let lb = LoadBalancer::new("proxy-lb", "org-1", "proj-1");
|
|
metadata.save_lb(&lb).await.unwrap();
|
|
|
|
let pool = Pool::new("proxy-pool", lb.id, PoolAlgorithm::RoundRobin, PoolProtocol::Tcp);
|
|
metadata.save_pool(&pool).await.unwrap();
|
|
|
|
let mut backend = Backend::new("proxy-backend", pool.id, "127.0.0.1", backend_port);
|
|
backend.status = BackendStatus::Online;
|
|
metadata.save_backend(&backend).await.unwrap();
|
|
|
|
let mut listener = Listener::new("proxy-listener", lb.id, ListenerProtocol::Tcp, 18080);
|
|
listener.default_pool_id = Some(pool.id);
|
|
metadata.save_listener(&listener).await.unwrap();
|
|
|
|
// 3. Start DataPlane
|
|
let dataplane = DataPlane::new(metadata.clone());
|
|
dataplane
|
|
.start_listener(listener.id)
|
|
.await
|
|
.expect("start listener failed");
|
|
|
|
// Give listener time to start
|
|
tokio::time::sleep(Duration::from_millis(50)).await;
|
|
|
|
// 4. Connect to VIP and test proxy
|
|
let mut client = TcpStream::connect("127.0.0.1:18080")
|
|
.await
|
|
.expect("client connect failed");
|
|
|
|
client.write_all(b"HELLO").await.expect("client write failed");
|
|
|
|
let mut response = vec![0u8; 128];
|
|
let n = client.read(&mut response).await.expect("client read failed");
|
|
let response_str = String::from_utf8_lossy(&response[..n]);
|
|
|
|
assert!(
|
|
response_str.contains("ECHO: HELLO"),
|
|
"Expected echo response, got: {}",
|
|
response_str
|
|
);
|
|
|
|
// 5. Cleanup
|
|
dataplane.stop_listener(&listener.id).await.unwrap();
|
|
backend_server.abort();
|
|
}
|
|
|
|
/// Test 5: Health check configuration
|
|
#[tokio::test]
|
|
async fn test_health_check_config() {
|
|
let metadata = Arc::new(LbMetadataStore::new_in_memory());
|
|
|
|
// Create LB and Pool
|
|
let lb = LoadBalancer::new("hc-config-lb", "org-1", "proj-1");
|
|
metadata.save_lb(&lb).await.unwrap();
|
|
|
|
let pool = Pool::new("hc-pool", lb.id, PoolAlgorithm::RoundRobin, PoolProtocol::Tcp);
|
|
metadata.save_pool(&pool).await.unwrap();
|
|
|
|
// Create TCP health check
|
|
let tcp_hc = HealthCheck::new_tcp("tcp-check", pool.id);
|
|
metadata.save_health_check(&tcp_hc).await.unwrap();
|
|
|
|
// Verify retrieval
|
|
let hcs = metadata.list_health_checks(&pool.id).await.unwrap();
|
|
assert_eq!(hcs.len(), 1);
|
|
assert_eq!(hcs[0].check_type, HealthCheckType::Tcp);
|
|
assert_eq!(hcs[0].interval_seconds, 30);
|
|
|
|
// Create HTTP health check
|
|
let http_hc = HealthCheck::new_http("http-check", pool.id, "/healthz");
|
|
metadata.save_health_check(&http_hc).await.unwrap();
|
|
|
|
let hcs = metadata.list_health_checks(&pool.id).await.unwrap();
|
|
assert_eq!(hcs.len(), 2);
|
|
|
|
// Find HTTP check
|
|
let http = hcs.iter().find(|h| h.check_type == HealthCheckType::Http);
|
|
assert!(http.is_some());
|
|
assert_eq!(
|
|
http.unwrap().http_config.as_ref().unwrap().path,
|
|
"/healthz"
|
|
);
|
|
}
|
|
|
|
/// Test 5.5: Basic load balancing - T051.S2
|
|
/// Tests round-robin traffic distribution across multiple backends
|
|
#[tokio::test]
|
|
async fn test_basic_load_balancing() {
|
|
use std::collections::HashMap;
|
|
use tokio::sync::Mutex;
|
|
|
|
// 1. Start 3 backend servers that echo their port number
|
|
let backend1_port = 18001u16;
|
|
let backend2_port = 18002u16;
|
|
let backend3_port = 18003u16;
|
|
|
|
let (b1_shutdown_tx, mut b1_shutdown_rx) = tokio::sync::mpsc::channel::<()>(1);
|
|
let (b2_shutdown_tx, mut b2_shutdown_rx) = tokio::sync::mpsc::channel::<()>(1);
|
|
let (b3_shutdown_tx, mut b3_shutdown_rx) = tokio::sync::mpsc::channel::<()>(1);
|
|
|
|
// Track request count per backend
|
|
let request_counts = Arc::new(Mutex::new(HashMap::<u16, usize>::new()));
|
|
|
|
// Backend 1
|
|
let counts1 = request_counts.clone();
|
|
let _backend1 = tokio::spawn(async move {
|
|
let listener = TcpListener::bind(format!("127.0.0.1:{}", backend1_port))
|
|
.await
|
|
.expect("backend1 bind");
|
|
loop {
|
|
tokio::select! {
|
|
Ok((mut socket, _)) = listener.accept() => {
|
|
*counts1.lock().await.entry(backend1_port).or_insert(0) += 1;
|
|
let _ = socket.write_all(format!("B{}", backend1_port).as_bytes()).await;
|
|
}
|
|
_ = b1_shutdown_rx.recv() => break,
|
|
}
|
|
}
|
|
});
|
|
|
|
// Backend 2
|
|
let counts2 = request_counts.clone();
|
|
let _backend2 = tokio::spawn(async move {
|
|
let listener = TcpListener::bind(format!("127.0.0.1:{}", backend2_port))
|
|
.await
|
|
.expect("backend2 bind");
|
|
loop {
|
|
tokio::select! {
|
|
Ok((mut socket, _)) = listener.accept() => {
|
|
*counts2.lock().await.entry(backend2_port).or_insert(0) += 1;
|
|
let _ = socket.write_all(format!("B{}", backend2_port).as_bytes()).await;
|
|
}
|
|
_ = b2_shutdown_rx.recv() => break,
|
|
}
|
|
}
|
|
});
|
|
|
|
// Backend 3
|
|
let counts3 = request_counts.clone();
|
|
let _backend3 = tokio::spawn(async move {
|
|
let listener = TcpListener::bind(format!("127.0.0.1:{}", backend3_port))
|
|
.await
|
|
.expect("backend3 bind");
|
|
loop {
|
|
tokio::select! {
|
|
Ok((mut socket, _)) = listener.accept() => {
|
|
*counts3.lock().await.entry(backend3_port).or_insert(0) += 1;
|
|
let _ = socket.write_all(format!("B{}", backend3_port).as_bytes()).await;
|
|
}
|
|
_ = b3_shutdown_rx.recv() => break,
|
|
}
|
|
}
|
|
});
|
|
|
|
tokio::time::sleep(Duration::from_millis(200)).await;
|
|
|
|
// 2. Setup FiberLB
|
|
let metadata = Arc::new(LbMetadataStore::new_in_memory());
|
|
|
|
let lb = LoadBalancer::new("lb-test", "", "");
|
|
metadata.save_lb(&lb).await.unwrap();
|
|
|
|
let pool = Pool::new("test-pool", lb.id, PoolAlgorithm::RoundRobin, PoolProtocol::Tcp);
|
|
metadata.save_pool(&pool).await.unwrap();
|
|
|
|
// Create 3 backends - all Online
|
|
for (i, port) in [(1, backend1_port), (2, backend2_port), (3, backend3_port)] {
|
|
let mut backend = Backend::new(&format!("backend-{}", i), pool.id, "127.0.0.1", port);
|
|
backend.status = BackendStatus::Online;
|
|
metadata.save_backend(&backend).await.unwrap();
|
|
}
|
|
|
|
// Create listener on port 17080
|
|
let mut listener = Listener::new("test-listener", lb.id, ListenerProtocol::Tcp, 17080);
|
|
listener.default_pool_id = Some(pool.id);
|
|
metadata.save_listener(&listener).await.unwrap();
|
|
|
|
// 3. Start DataPlane
|
|
let dataplane = DataPlane::new(metadata.clone());
|
|
dataplane.start_listener(listener.id).await.expect("start listener");
|
|
|
|
tokio::time::sleep(Duration::from_millis(200)).await;
|
|
|
|
// 4. Send 15 requests (should distribute 5-5-5 with perfect round-robin)
|
|
println!("Sending 15 requests through load balancer...");
|
|
for i in 0..15 {
|
|
let mut client = TcpStream::connect("127.0.0.1:17080")
|
|
.await
|
|
.expect(&format!("connect request {}", i));
|
|
client.write_all(b"TEST").await.expect("write");
|
|
let mut buf = [0u8; 64];
|
|
let _ = client.read(&mut buf).await;
|
|
tokio::time::sleep(Duration::from_millis(10)).await;
|
|
}
|
|
|
|
// 5. Verify distribution
|
|
let counts = request_counts.lock().await;
|
|
let count1 = counts.get(&backend1_port).copied().unwrap_or(0);
|
|
let count2 = counts.get(&backend2_port).copied().unwrap_or(0);
|
|
let count3 = counts.get(&backend3_port).copied().unwrap_or(0);
|
|
|
|
println!("Request distribution:");
|
|
println!(" Backend 1 ({}): {} requests", backend1_port, count1);
|
|
println!(" Backend 2 ({}): {} requests", backend2_port, count2);
|
|
println!(" Backend 3 ({}): {} requests", backend3_port, count3);
|
|
|
|
// All backends should have received requests
|
|
assert!(count1 > 0, "Backend 1 should receive requests");
|
|
assert!(count2 > 0, "Backend 2 should receive requests");
|
|
assert!(count3 > 0, "Backend 3 should receive requests");
|
|
|
|
// Total should equal 15
|
|
assert_eq!(count1 + count2 + count3, 15, "Total requests should be 15");
|
|
|
|
// With round-robin, each backend should get exactly 5 requests
|
|
// (or very close with minor timing variations)
|
|
assert_eq!(count1, 5, "Backend 1 should receive 5 requests (round-robin)");
|
|
assert_eq!(count2, 5, "Backend 2 should receive 5 requests (round-robin)");
|
|
assert_eq!(count3, 5, "Backend 3 should receive 5 requests (round-robin)");
|
|
|
|
println!("✅ T051.S2 COMPLETE: Round-robin load balancing verified");
|
|
|
|
// Cleanup
|
|
dataplane.stop_listener(&listener.id).await.unwrap();
|
|
let _ = b1_shutdown_tx.send(()).await;
|
|
let _ = b2_shutdown_tx.send(()).await;
|
|
let _ = b3_shutdown_tx.send(()).await;
|
|
}
|
|
|
|
/// Test 6: Health check failover - T051.S4
|
|
/// Tests automatic backend health check failure detection and recovery
|
|
#[tokio::test]
|
|
async fn test_health_check_failover() {
|
|
// 1. Start 3 mock backend servers that accept TCP connections
|
|
let backend1_port = 19001u16;
|
|
let backend2_port = 19002u16;
|
|
let backend3_port = 19003u16;
|
|
|
|
// Use shutdown signals to control backends
|
|
let (b1_shutdown_tx, mut b1_shutdown_rx) = tokio::sync::mpsc::channel::<()>(1);
|
|
let (b2_shutdown_tx, mut b2_shutdown_rx) = tokio::sync::mpsc::channel::<()>(1);
|
|
let (b3_shutdown_tx, mut b3_shutdown_rx) = tokio::sync::mpsc::channel::<()>(1);
|
|
|
|
// Backend 1 - stays online throughout test
|
|
let backend1 = tokio::spawn(async move {
|
|
let listener = TcpListener::bind(format!("127.0.0.1:{}", backend1_port))
|
|
.await
|
|
.expect("backend1 bind");
|
|
loop {
|
|
tokio::select! {
|
|
Ok(_) = listener.accept() => {}, // Just accept and drop
|
|
_ = b1_shutdown_rx.recv() => break,
|
|
}
|
|
}
|
|
});
|
|
|
|
// Backend 2 - will be stopped and restarted
|
|
let backend2 = tokio::spawn(async move {
|
|
let listener = TcpListener::bind(format!("127.0.0.1:{}", backend2_port))
|
|
.await
|
|
.expect("backend2 bind");
|
|
loop {
|
|
tokio::select! {
|
|
Ok(_) = listener.accept() => {},
|
|
_ = b2_shutdown_rx.recv() => break,
|
|
}
|
|
}
|
|
});
|
|
|
|
// Backend 3 - stays online throughout test
|
|
let _backend3 = tokio::spawn(async move {
|
|
let listener = TcpListener::bind(format!("127.0.0.1:{}", backend3_port))
|
|
.await
|
|
.expect("backend3 bind");
|
|
loop {
|
|
tokio::select! {
|
|
Ok(_) = listener.accept() => {},
|
|
_ = b3_shutdown_rx.recv() => break,
|
|
}
|
|
}
|
|
});
|
|
|
|
// Give backends time to start
|
|
tokio::time::sleep(Duration::from_millis(200)).await;
|
|
|
|
// 2. Setup FiberLB configuration
|
|
let metadata = Arc::new(LbMetadataStore::new_in_memory());
|
|
|
|
// Use empty org_id so health checker can find it (health checker scans with org_id="")
|
|
let lb = LoadBalancer::new("failover-lb", "", "");
|
|
metadata.save_lb(&lb).await.unwrap();
|
|
|
|
let pool = Pool::new("failover-pool", lb.id, PoolAlgorithm::RoundRobin, PoolProtocol::Tcp);
|
|
metadata.save_pool(&pool).await.unwrap();
|
|
|
|
// Create 3 backends - all initially Unknown (will be checked by health checker)
|
|
println!("Creating 3 backends...");
|
|
for (i, port) in [(1, backend1_port), (2, backend2_port), (3, backend3_port)] {
|
|
let backend = Backend::new(&format!("backend-{}", i), pool.id, "127.0.0.1", port);
|
|
println!(" Created backend-{}: {}:{} (id={})", i, backend.address, backend.port, backend.id);
|
|
metadata.save_backend(&backend).await.unwrap();
|
|
}
|
|
|
|
// Verify backends were saved
|
|
let saved_backends = metadata.list_backends(&pool.id).await.unwrap();
|
|
println!("Saved {} backends to metadata", saved_backends.len());
|
|
|
|
// Create health check with fast interval (1s) for testing
|
|
let hc = HealthCheck::new_tcp("tcp-check", pool.id);
|
|
metadata.save_health_check(&hc).await.unwrap();
|
|
println!("Created health check config");
|
|
|
|
// 3. Start health checker with 1s interval
|
|
println!("Starting health checker...");
|
|
let (hc_handle, hc_shutdown_tx) = fiberlb_server::spawn_health_checker(
|
|
metadata.clone(),
|
|
Duration::from_secs(1),
|
|
);
|
|
println!("Health checker task spawned");
|
|
|
|
// 4. Wait for initial health check cycles to mark all backends online
|
|
// Health checker runs every 1s, wait 5s to allow 4-5 cycles
|
|
println!("Waiting 5s for health checks to run...");
|
|
tokio::time::sleep(Duration::from_secs(5)).await;
|
|
|
|
// Verify all backends are online
|
|
let backends = metadata.list_backends(&pool.id).await.unwrap();
|
|
println!("Backend statuses after {} health check cycles:", backends.len());
|
|
for backend in &backends {
|
|
println!(" Port {}: {:?}", backend.port, backend.status);
|
|
}
|
|
|
|
for backend in &backends {
|
|
assert_eq!(backend.status, BackendStatus::Online,
|
|
"Backend {} should be online initially (got {:?})", backend.port, backend.status);
|
|
}
|
|
println!("✓ All 3 backends initially healthy");
|
|
|
|
// 5. Stop backend 2 to simulate failure
|
|
let _ = b2_shutdown_tx.send(()).await;
|
|
tokio::time::sleep(Duration::from_millis(100)).await;
|
|
println!("✗ Stopped backend 2 (port {})", backend2_port);
|
|
|
|
// 6. Wait for health check to detect failure (2-3 cycles)
|
|
tokio::time::sleep(Duration::from_secs(3)).await;
|
|
|
|
// Verify backend2 is marked offline
|
|
let backends = metadata.list_backends(&pool.id).await.unwrap();
|
|
let backend1_status = backends.iter().find(|b| b.port == backend1_port).unwrap();
|
|
let backend2_status = backends.iter().find(|b| b.port == backend2_port).unwrap();
|
|
let backend3_status = backends.iter().find(|b| b.port == backend3_port).unwrap();
|
|
|
|
assert_eq!(backend1_status.status, BackendStatus::Online, "Backend 1 should still be online");
|
|
assert_eq!(backend2_status.status, BackendStatus::Offline, "Backend 2 should be offline after failure");
|
|
assert_eq!(backend3_status.status, BackendStatus::Online, "Backend 3 should still be online");
|
|
println!("✓ Health checker detected backend 2 failure");
|
|
|
|
// 7. Verify dataplane would exclude offline backend
|
|
use fiberlb_types::BackendAdminState;
|
|
let healthy: Vec<_> = backends
|
|
.into_iter()
|
|
.filter(|b| {
|
|
b.admin_state == BackendAdminState::Enabled &&
|
|
(b.status == BackendStatus::Online || b.status == BackendStatus::Unknown)
|
|
})
|
|
.collect();
|
|
|
|
assert_eq!(healthy.len(), 2, "Only 2 backends should be healthy");
|
|
assert!(!healthy.iter().any(|b| b.port == backend2_port),
|
|
"Backend 2 should not be in healthy list");
|
|
println!("✓ Dataplane filter excludes offline backend");
|
|
|
|
// 8. Restart backend 2
|
|
let (b2_restart_shutdown_tx, mut b2_restart_shutdown_rx) = tokio::sync::mpsc::channel::<()>(1);
|
|
let backend2_restart = tokio::spawn(async move {
|
|
let listener = TcpListener::bind(format!("127.0.0.1:{}", backend2_port))
|
|
.await
|
|
.expect("backend2 restart bind");
|
|
loop {
|
|
tokio::select! {
|
|
Ok(_) = listener.accept() => {},
|
|
_ = b2_restart_shutdown_rx.recv() => break,
|
|
}
|
|
}
|
|
});
|
|
tokio::time::sleep(Duration::from_millis(100)).await;
|
|
println!("✓ Restarted backend 2");
|
|
|
|
// 9. Wait for health check to detect recovery (2-3 cycles)
|
|
tokio::time::sleep(Duration::from_secs(3)).await;
|
|
|
|
// Verify backend2 is back online
|
|
let backends = metadata.list_backends(&pool.id).await.unwrap();
|
|
let backend2_recovered = backends.iter().find(|b| b.port == backend2_port).unwrap();
|
|
assert_eq!(backend2_recovered.status, BackendStatus::Online,
|
|
"Backend 2 should be online after recovery");
|
|
println!("✓ Health checker detected backend 2 recovery");
|
|
|
|
// 10. Verify all backends healthy again
|
|
let healthy: Vec<_> = backends
|
|
.into_iter()
|
|
.filter(|b| {
|
|
b.admin_state == BackendAdminState::Enabled &&
|
|
(b.status == BackendStatus::Online || b.status == BackendStatus::Unknown)
|
|
})
|
|
.collect();
|
|
|
|
assert_eq!(healthy.len(), 3, "All 3 backends should be healthy after recovery");
|
|
println!("✓ All backends healthy again");
|
|
|
|
// Cleanup
|
|
let _ = hc_shutdown_tx.send(true);
|
|
let _ = tokio::time::timeout(Duration::from_secs(2), hc_handle).await;
|
|
|
|
let _ = b1_shutdown_tx.send(()).await;
|
|
let _ = b2_restart_shutdown_tx.send(()).await;
|
|
let _ = b3_shutdown_tx.send(()).await;
|
|
|
|
backend1.abort();
|
|
backend2.abort();
|
|
backend2_restart.abort();
|
|
|
|
println!("\n✅ T051.S4 COMPLETE: Health check failover verified");
|
|
}
|