fiberlb: add native BGP speaker and VM smoke test
Some checks failed
Nix CI / filter (push) Failing after 1s
Nix CI / gate () (push) Has been skipped
Nix CI / gate (shared crates) (push) Has been skipped
Nix CI / build () (push) Has been skipped
Nix CI / ci-status (push) Failing after 1s

This commit is contained in:
centra 2026-03-30 16:13:14 +09:00
parent 96d46a3603
commit ce4bab07d6
Signed by: centra
GPG key ID: 0C09689D20B25ACA
14 changed files with 1919 additions and 275 deletions

View file

@ -5,6 +5,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
tonic_build::configure() tonic_build::configure()
.build_server(false) .build_server(false)
.build_client(true) .build_client(true)
.compile(&["proto/api/gobgp.proto"], &["proto"])?; .compile_protos(&["proto/api/gobgp.proto"], &["proto"])?;
Ok(()) Ok(())
} }

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
//! Server configuration //! Server configuration
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::net::SocketAddr; use std::net::{IpAddr, Ipv4Addr, SocketAddr};
/// TLS configuration /// TLS configuration
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@ -70,6 +70,18 @@ pub struct ServerConfig {
/// Authentication configuration /// Authentication configuration
#[serde(default)] #[serde(default)]
pub auth: AuthConfig, pub auth: AuthConfig,
/// Backend health checker configuration
#[serde(default)]
pub health: HealthRuntimeConfig,
/// VIP advertisement reconciliation configuration
#[serde(default)]
pub vip_advertisement: VipAdvertisementConfig,
/// Native BGP speaker configuration
#[serde(default)]
pub bgp: BgpConfig,
} }
/// Authentication configuration /// Authentication configuration
@ -84,6 +96,160 @@ fn default_iam_server_addr() -> String {
"127.0.0.1:50051".to_string() "127.0.0.1:50051".to_string()
} }
/// Backend health checker runtime configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthRuntimeConfig {
/// Interval between backend health check sweeps.
#[serde(default = "default_health_check_interval_secs")]
pub interval_secs: u64,
/// Timeout for individual backend checks.
#[serde(default = "default_health_check_timeout_secs")]
pub timeout_secs: u64,
}
fn default_health_check_interval_secs() -> u64 {
5
}
fn default_health_check_timeout_secs() -> u64 {
5
}
impl Default for HealthRuntimeConfig {
fn default() -> Self {
Self {
interval_secs: default_health_check_interval_secs(),
timeout_secs: default_health_check_timeout_secs(),
}
}
}
/// VIP advertisement reconciliation runtime configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VipAdvertisementConfig {
/// Interval between BGP advertisement reconciliation sweeps.
#[serde(default = "default_vip_check_interval_secs")]
pub interval_secs: u64,
}
fn default_vip_check_interval_secs() -> u64 {
3
}
impl Default for VipAdvertisementConfig {
fn default() -> Self {
Self {
interval_secs: default_vip_check_interval_secs(),
}
}
}
/// Static BGP peer configuration.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct BgpPeerConfig {
/// Peer IP address or hostname.
pub address: String,
/// Peer TCP port.
#[serde(default = "default_bgp_peer_port")]
pub port: u16,
/// Peer AS number.
pub asn: u32,
/// Optional operator-visible description.
#[serde(default)]
pub description: String,
}
fn default_bgp_peer_port() -> u16 {
179
}
/// Native BGP speaker configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BgpConfig {
/// Whether FiberLB should originate VIP routes itself.
#[serde(default)]
pub enabled: bool,
/// Local AS number.
#[serde(default = "default_bgp_local_as")]
pub local_as: u32,
/// BGP router ID. Must be IPv4.
#[serde(default = "default_bgp_router_id")]
pub router_id: String,
/// Optional explicit next-hop address. Falls back to router_id.
#[serde(default)]
pub next_hop: Option<String>,
/// Requested hold time in seconds.
#[serde(default = "default_bgp_hold_time_secs")]
pub hold_time_secs: u16,
/// Keepalive interval in seconds.
#[serde(default = "default_bgp_keepalive_secs")]
pub keepalive_secs: u16,
/// Delay before reconnecting to a failed peer.
#[serde(default = "default_bgp_connect_retry_secs")]
pub connect_retry_secs: u64,
/// Static peers for outbound eBGP sessions.
#[serde(default)]
pub peers: Vec<BgpPeerConfig>,
}
fn default_bgp_local_as() -> u32 {
65001
}
fn default_bgp_router_id() -> String {
Ipv4Addr::new(127, 0, 0, 1).to_string()
}
fn default_bgp_hold_time_secs() -> u16 {
90
}
fn default_bgp_keepalive_secs() -> u16 {
30
}
fn default_bgp_connect_retry_secs() -> u64 {
5
}
impl BgpConfig {
/// Effective next hop advertised in UPDATE messages.
pub fn next_hop_addr(&self) -> std::result::Result<IpAddr, std::net::AddrParseError> {
self.next_hop.as_deref().unwrap_or(&self.router_id).parse()
}
/// Parsed router ID as IPv4.
pub fn router_id_addr(&self) -> std::result::Result<Ipv4Addr, std::net::AddrParseError> {
self.router_id.parse()
}
}
impl Default for BgpConfig {
fn default() -> Self {
Self {
enabled: false,
local_as: default_bgp_local_as(),
router_id: default_bgp_router_id(),
next_hop: None,
hold_time_secs: default_bgp_hold_time_secs(),
keepalive_secs: default_bgp_keepalive_secs(),
connect_retry_secs: default_bgp_connect_retry_secs(),
peers: Vec::new(),
}
}
}
impl Default for AuthConfig { impl Default for AuthConfig {
fn default() -> Self { fn default() -> Self {
Self { Self {
@ -104,6 +270,9 @@ impl Default for ServerConfig {
log_level: "info".to_string(), log_level: "info".to_string(),
tls: None, tls: None,
auth: AuthConfig::default(), auth: AuthConfig::default(),
health: HealthRuntimeConfig::default(),
vip_advertisement: VipAdvertisementConfig::default(),
bgp: BgpConfig::default(),
} }
} }
} }

View file

@ -104,8 +104,8 @@ impl HealthChecker {
.await .await
.map_err(|e| HealthCheckError::MetadataError(e.to_string()))?; .map_err(|e| HealthCheckError::MetadataError(e.to_string()))?;
// Use first health check config, or default TCP check // Use the first enabled health check config, or default TCP check.
let hc_config = health_checks.into_iter().next(); let hc_config = health_checks.into_iter().find(|check| check.enabled);
// Check all backends in the pool // Check all backends in the pool
let backends = self let backends = self
@ -210,9 +210,10 @@ impl HealthChecker {
); );
// Write request // Write request
stream.writable().await.map_err(|e| { stream
HealthCheckError::HttpError(format!("stream not writable: {}", e)) .writable()
})?; .await
.map_err(|e| HealthCheckError::HttpError(format!("stream not writable: {}", e)))?;
match stream.try_write(request.as_bytes()) { match stream.try_write(request.as_bytes()) {
Ok(_) => {} Ok(_) => {}
@ -223,18 +224,19 @@ impl HealthChecker {
// Read response (just first line for status code) // Read response (just first line for status code)
let mut buf = [0u8; 128]; let mut buf = [0u8; 128];
stream.readable().await.map_err(|e| { stream
HealthCheckError::HttpError(format!("stream not readable: {}", e)) .readable()
})?; .await
.map_err(|e| HealthCheckError::HttpError(format!("stream not readable: {}", e)))?;
let n = match stream.try_read(&mut buf) { let n = match stream.try_read(&mut buf) {
Ok(n) => n, Ok(n) => n,
Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => {
// Wait a bit and try again // Wait a bit and try again
tokio::time::sleep(Duration::from_millis(100)).await; tokio::time::sleep(Duration::from_millis(100)).await;
stream.try_read(&mut buf).map_err(|e| { stream
HealthCheckError::HttpError(format!("read failed: {}", e)) .try_read(&mut buf)
})? .map_err(|e| HealthCheckError::HttpError(format!("read failed: {}", e)))?
} }
Err(e) => { Err(e) => {
return Err(HealthCheckError::HttpError(format!("read failed: {}", e))); return Err(HealthCheckError::HttpError(format!("read failed: {}", e)));
@ -250,8 +252,11 @@ impl HealthChecker {
let status_line = response.lines().next().unwrap_or(""); let status_line = response.lines().next().unwrap_or("");
// Check for 2xx status code // Check for 2xx status code
if status_line.contains(" 200 ") || status_line.contains(" 201 ") || if status_line.contains(" 200 ")
status_line.contains(" 202 ") || status_line.contains(" 204 ") { || status_line.contains(" 201 ")
|| status_line.contains(" 202 ")
|| status_line.contains(" 204 ")
{
Ok(()) Ok(())
} else { } else {
Err(HealthCheckError::HttpError(format!( Err(HealthCheckError::HttpError(format!(
@ -266,11 +271,13 @@ impl HealthChecker {
pub fn spawn_health_checker( pub fn spawn_health_checker(
metadata: Arc<LbMetadataStore>, metadata: Arc<LbMetadataStore>,
check_interval: Duration, check_interval: Duration,
check_timeout: Duration,
) -> (tokio::task::JoinHandle<()>, watch::Sender<bool>) { ) -> (tokio::task::JoinHandle<()>, watch::Sender<bool>) {
let (shutdown_tx, shutdown_rx) = watch::channel(false); let (shutdown_tx, shutdown_rx) = watch::channel(false);
let handle = tokio::spawn(async move { let handle = tokio::spawn(async move {
let mut checker = HealthChecker::new(metadata, check_interval, shutdown_rx); let mut checker =
HealthChecker::new(metadata, check_interval, shutdown_rx).with_timeout(check_timeout);
checker.run().await; checker.run().await;
}); });
@ -321,7 +328,8 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn test_spawn_health_checker() { async fn test_spawn_health_checker() {
let metadata = Arc::new(LbMetadataStore::new_in_memory()); let metadata = Arc::new(LbMetadataStore::new_in_memory());
let (handle, shutdown_tx) = spawn_health_checker(metadata, Duration::from_secs(60)); let (handle, shutdown_tx) =
spawn_health_checker(metadata, Duration::from_secs(60), Duration::from_secs(5));
// Verify it started // Verify it started
assert!(!handle.is_finished()); assert!(!handle.is_finished());

View file

@ -1,5 +1,6 @@
//! FiberLB server implementation //! FiberLB server implementation
pub mod bgp_client;
pub mod config; pub mod config;
pub mod dataplane; pub mod dataplane;
pub mod healthcheck; pub mod healthcheck;
@ -9,13 +10,16 @@ pub mod maglev;
pub mod metadata; pub mod metadata;
pub mod services; pub mod services;
pub mod tls; pub mod tls;
pub mod vip_manager;
pub use bgp_client::{create_bgp_client, BgpClient, BgpError, NativeBgpSpeaker};
pub use config::ServerConfig; pub use config::ServerConfig;
pub use dataplane::DataPlane; pub use dataplane::DataPlane;
pub use healthcheck::{HealthChecker, spawn_health_checker}; pub use healthcheck::{spawn_health_checker, HealthChecker};
pub use l7_dataplane::L7DataPlane; pub use l7_dataplane::L7DataPlane;
pub use l7_router::L7Router; pub use l7_router::L7Router;
pub use maglev::{MaglevTable, ConnectionTracker}; pub use maglev::{ConnectionTracker, MaglevTable};
pub use metadata::LbMetadataStore; pub use metadata::LbMetadataStore;
pub use services::*; pub use services::*;
pub use tls::{build_tls_config, CertificateStore, SniCertResolver}; pub use tls::{build_tls_config, CertificateStore, SniCertResolver};
pub use vip_manager::VipManager;

View file

@ -3,9 +3,9 @@
//! Implementation of Google's Maglev consistent hashing algorithm for L4 load balancing. //! Implementation of Google's Maglev consistent hashing algorithm for L4 load balancing.
//! Reference: https://research.google/pubs/pub44824/ //! Reference: https://research.google/pubs/pub44824/
use fiberlb_types::Backend;
use std::collections::hash_map::DefaultHasher; use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use fiberlb_types::Backend;
/// Default lookup table size (prime number for better distribution) /// Default lookup table size (prime number for better distribution)
/// Google's paper uses 65537, but we use a smaller prime for memory efficiency /// Google's paper uses 65537, but we use a smaller prime for memory efficiency
@ -116,9 +116,7 @@ impl MaglevTable {
let offset = Self::hash_offset(backend, size); let offset = Self::hash_offset(backend, size);
let skip = Self::hash_skip(backend, size); let skip = Self::hash_skip(backend, size);
(0..size) (0..size).map(|j| (offset + j * skip) % size).collect()
.map(|j| (offset + j * skip) % size)
.collect()
} }
/// Hash function for offset calculation /// Hash function for offset calculation
@ -134,10 +132,26 @@ impl MaglevTable {
let mut hasher = DefaultHasher::new(); let mut hasher = DefaultHasher::new();
backend.hash(&mut hasher); backend.hash(&mut hasher);
"skip".hash(&mut hasher); "skip".hash(&mut hasher);
let skip = (hasher.finish() as usize) % (size - 1) + 1; let mut skip = (hasher.finish() as usize) % (size - 1) + 1;
// For non-prime table sizes we still need a full permutation, so force
// the step to be coprime with the table size.
while Self::gcd(skip, size) != 1 {
skip = (skip % (size - 1)) + 1;
}
skip skip
} }
fn gcd(mut a: usize, mut b: usize) -> usize {
while b != 0 {
let remainder = a % b;
a = b;
b = remainder;
}
a
}
/// Hash a connection key (e.g., "192.168.1.1:54321") /// Hash a connection key (e.g., "192.168.1.1:54321")
fn hash_key(key: &str) -> u64 { fn hash_key(key: &str) -> u64 {
let mut hasher = DefaultHasher::new(); let mut hasher = DefaultHasher::new();
@ -291,11 +305,9 @@ mod tests {
// Count how many keys map to the same backend // Count how many keys map to the same backend
let mut unchanged = 0; let mut unchanged = 0;
let mut total = 0;
for (key, old_backend) in &mappings { for (key, old_backend) in &mappings {
if let Some(idx) = table2.lookup(key) { if let Some(idx) = table2.lookup(key) {
if let Some(new_backend) = table2.backend_id(idx) { if let Some(new_backend) = table2.backend_id(idx) {
total += 1;
// Only keys that were on removed backend should change // Only keys that were on removed backend should change
if old_backend != "10.0.0.2:8080" { if old_backend != "10.0.0.2:8080" {
if old_backend == new_backend { if old_backend == new_backend {

View file

@ -4,31 +4,30 @@ use std::sync::Arc;
use chainfire_client::Client as ChainFireClient; use chainfire_client::Client as ChainFireClient;
use clap::Parser; use clap::Parser;
use metrics_exporter_prometheus::PrometheusBuilder;
use fiberlb_api::{ use fiberlb_api::{
backend_service_server::BackendServiceServer,
certificate_service_server::CertificateServiceServer,
health_check_service_server::HealthCheckServiceServer,
l7_policy_service_server::L7PolicyServiceServer, l7_rule_service_server::L7RuleServiceServer,
listener_service_server::ListenerServiceServer,
load_balancer_service_server::LoadBalancerServiceServer, load_balancer_service_server::LoadBalancerServiceServer,
pool_service_server::PoolServiceServer, pool_service_server::PoolServiceServer,
backend_service_server::BackendServiceServer,
listener_service_server::ListenerServiceServer,
health_check_service_server::HealthCheckServiceServer,
l7_policy_service_server::L7PolicyServiceServer,
l7_rule_service_server::L7RuleServiceServer,
certificate_service_server::CertificateServiceServer,
}; };
use fiberlb_server::{ use fiberlb_server::{
config::MetadataBackend, config::MetadataBackend, create_bgp_client, spawn_health_checker, BackendServiceImpl,
LbMetadataStore, LoadBalancerServiceImpl, PoolServiceImpl, BackendServiceImpl, CertificateServiceImpl, DataPlane, HealthCheckServiceImpl, L7DataPlane, L7PolicyServiceImpl,
ListenerServiceImpl, HealthCheckServiceImpl, L7PolicyServiceImpl, L7RuleServiceImpl, L7RuleServiceImpl, LbMetadataStore, ListenerServiceImpl, LoadBalancerServiceImpl,
CertificateServiceImpl, DataPlane, L7DataPlane, ServerConfig, PoolServiceImpl, ServerConfig, VipManager,
}; };
use iam_service_auth::AuthService; use iam_service_auth::AuthService;
use metrics_exporter_prometheus::PrometheusBuilder;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::PathBuf; use std::path::PathBuf;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use tonic::transport::{Certificate, Identity, Server, ServerTlsConfig}; use tonic::transport::{Certificate, Identity, Server, ServerTlsConfig};
use tonic::{Request, Status}; use tonic::{Request, Status};
use tonic_health::server::health_reporter; use tonic_health::server::health_reporter;
use tracing_subscriber::EnvFilter; use tracing_subscriber::EnvFilter;
use std::time::{SystemTime, UNIX_EPOCH};
/// FiberLB load balancer server /// FiberLB load balancer server
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
@ -113,8 +112,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize tracing // Initialize tracing
tracing_subscriber::fmt() tracing_subscriber::fmt()
.with_env_filter( .with_env_filter(
EnvFilter::try_from_default_env() EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(&config.log_level)),
.unwrap_or_else(|_| EnvFilter::new(&config.log_level)),
) )
.init(); .init();
@ -158,8 +156,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
config.flaredb_endpoint.clone(), config.flaredb_endpoint.clone(),
config.chainfire_endpoint.clone(), config.chainfire_endpoint.clone(),
) )
.await .await
.map_err(|e| format!("Failed to initialize FlareDB metadata store: {}", e))?, .map_err(|e| format!("Failed to initialize FlareDB metadata store: {}", e))?,
) )
} }
MetadataBackend::Postgres | MetadataBackend::Sqlite => { MetadataBackend::Postgres | MetadataBackend::Sqlite => {
@ -187,7 +185,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
}; };
// Initialize IAM authentication service // Initialize IAM authentication service
tracing::info!("Connecting to IAM server at {}", config.auth.iam_server_addr); tracing::info!(
"Connecting to IAM server at {}",
config.auth.iam_server_addr
);
let auth_service = AuthService::new(&config.auth.iam_server_addr) let auth_service = AuthService::new(&config.auth.iam_server_addr)
.await .await
.map_err(|e| format!("Failed to connect to IAM server: {}", e))?; .map_err(|e| format!("Failed to connect to IAM server: {}", e))?;
@ -228,6 +229,34 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
restore_runtime_listeners(metadata.clone(), dataplane.clone(), l7_dataplane.clone()).await?; restore_runtime_listeners(metadata.clone(), dataplane.clone(), l7_dataplane.clone()).await?;
let (_health_task, health_shutdown_tx) = spawn_health_checker(
metadata.clone(),
Duration::from_secs(config.health.interval_secs.max(1)),
Duration::from_secs(config.health.timeout_secs.max(1)),
);
let vip_manager = if config.bgp.enabled {
let next_hop = config.bgp.next_hop_addr().map_err(|error| {
format!(
"failed to parse FiberLB BGP next hop '{}': {}",
config
.bgp
.next_hop
.as_deref()
.unwrap_or(&config.bgp.router_id),
error
)
})?;
let bgp = create_bgp_client(config.bgp.clone()).await?;
let manager = Arc::new(VipManager::new(bgp, metadata.clone(), next_hop));
let _vip_task = manager.clone().spawn(Duration::from_secs(
config.vip_advertisement.interval_secs.max(1),
));
Some(manager)
} else {
None
};
// Setup health service // Setup health service
let (mut health_reporter, health_service) = health_reporter(); let (mut health_reporter, health_service) = health_reporter();
health_reporter health_reporter
@ -289,7 +318,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Start gRPC server // Start gRPC server
tracing::info!("gRPC server listening on {}", grpc_addr); tracing::info!("gRPC server listening on {}", grpc_addr);
server let server_result = server
.add_service(health_service) .add_service(health_service)
.add_service(tonic::codegen::InterceptedService::new( .add_service(tonic::codegen::InterceptedService::new(
LoadBalancerServiceServer::new(lb_service), LoadBalancerServiceServer::new(lb_service),
@ -324,8 +353,17 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
make_interceptor(auth_service.clone()), make_interceptor(auth_service.clone()),
)) ))
.serve(grpc_addr) .serve(grpc_addr)
.await?; .await;
let _ = health_shutdown_tx.send(true);
if let Some(vip_manager) = vip_manager {
if let Err(error) = vip_manager.shutdown().await {
tracing::warn!(error = %error, "FiberLB VIP manager shutdown failed");
}
}
server_result?;
Ok(()) Ok(())
} }
@ -389,31 +427,29 @@ async fn register_chainfire_membership(
let value = format!(r#"{{"addr":"{}","ts":{}}}"#, addr, ts); let value = format!(r#"{{"addr":"{}","ts":{}}}"#, addr, ts);
let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(120); let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(120);
let mut attempt = 0usize; let mut attempt = 0usize;
let mut last_error = String::new(); let last_error = loop {
loop {
attempt += 1; attempt += 1;
match ChainFireClient::connect(endpoint).await { let current_error = match ChainFireClient::connect(endpoint).await {
Ok(mut client) => match client.put_str(&key, &value).await { Ok(mut client) => match client.put_str(&key, &value).await {
Ok(_) => return Ok(()), Ok(_) => return Ok(()),
Err(error) => last_error = format!("put failed: {}", error), Err(error) => format!("put failed: {}", error),
}, },
Err(error) => last_error = format!("connect failed: {}", error), Err(error) => format!("connect failed: {}", error),
} };
if tokio::time::Instant::now() >= deadline { if tokio::time::Instant::now() >= deadline {
break; break current_error;
} }
tracing::warn!( tracing::warn!(
attempt, attempt,
endpoint, endpoint,
service, service,
error = %last_error, error = %current_error,
"retrying ChainFire membership registration" "retrying ChainFire membership registration"
); );
tokio::time::sleep(std::time::Duration::from_secs(2)).await; tokio::time::sleep(std::time::Duration::from_secs(2)).await;
} };
Err(std::io::Error::other(format!( Err(std::io::Error::other(format!(
"failed to register ChainFire membership for {} via {} after {} attempts: {}", "failed to register ChainFire membership for {} via {} after {} attempts: {}",
@ -435,9 +471,15 @@ async fn restore_runtime_listeners(
} }
let result = if listener.is_l7() { let result = if listener.is_l7() {
l7_dataplane.start_listener(listener.id).await.map_err(|e| e.to_string()) l7_dataplane
.start_listener(listener.id)
.await
.map_err(|e| e.to_string())
} else { } else {
dataplane.start_listener(listener.id).await.map_err(|e| e.to_string()) dataplane
.start_listener(listener.id)
.await
.map_err(|e| e.to_string())
}; };
if let Err(err) = result { if let Err(err) = result {

View file

@ -12,7 +12,7 @@ use tokio::sync::RwLock;
use tokio::time::sleep; use tokio::time::sleep;
use tracing::{debug, error, info, warn}; use tracing::{debug, error, info, warn};
use crate::bgp_client::{BgpClient, BgpConfig}; use crate::bgp_client::BgpClient;
use crate::metadata::LbMetadataStore; use crate::metadata::LbMetadataStore;
use fiberlb_types::LoadBalancerId; use fiberlb_types::LoadBalancerId;
@ -43,11 +43,7 @@ pub struct VipManager {
impl VipManager { impl VipManager {
/// Create a new VIP manager /// Create a new VIP manager
pub fn new( pub fn new(bgp: Arc<dyn BgpClient>, metadata: Arc<LbMetadataStore>, next_hop: IpAddr) -> Self {
bgp: Arc<dyn BgpClient>,
metadata: Arc<LbMetadataStore>,
next_hop: IpAddr,
) -> Self {
Self { Self {
bgp, bgp,
metadata, metadata,
@ -119,7 +115,10 @@ impl VipManager {
} }
/// Check if a load balancer has any healthy backends /// Check if a load balancer has any healthy backends
async fn has_healthy_backends(&self, lb_id: &LoadBalancerId) -> Result<bool, Box<dyn std::error::Error>> { async fn has_healthy_backends(
&self,
lb_id: &LoadBalancerId,
) -> Result<bool, Box<dyn std::error::Error>> {
// Get all pools for this load balancer // Get all pools for this load balancer
let pools = self.metadata.list_pools(lb_id).await?; let pools = self.metadata.list_pools(lb_id).await?;
@ -129,8 +128,7 @@ impl VipManager {
// Check if any backend is healthy // Check if any backend is healthy
for backend in backends { for backend in backends {
use fiberlb_types::BackendStatus; if backend.is_available() {
if backend.status == BackendStatus::Online {
return Ok(true); return Ok(true);
} }
} }
@ -144,7 +142,10 @@ impl VipManager {
/// Compares current advertisements with desired active VIPs and: /// Compares current advertisements with desired active VIPs and:
/// - Announces new VIPs that should be active /// - Announces new VIPs that should be active
/// - Withdraws VIPs that should no longer be active /// - Withdraws VIPs that should no longer be active
async fn reconcile_advertisements(&self, active_vips: &HashSet<IpAddr>) -> Result<(), Box<dyn std::error::Error>> { async fn reconcile_advertisements(
&self,
active_vips: &HashSet<IpAddr>,
) -> Result<(), Box<dyn std::error::Error>> {
let mut state = self.vip_state.write().await; let mut state = self.vip_state.write().await;
// Find VIPs to announce (active but not yet advertised) // Find VIPs to announce (active but not yet advertised)

View file

@ -918,6 +918,15 @@
} }
); );
fiberlb-native-bgp-vm-smoke = pkgs.testers.runNixOSTest (
import ./nix/tests/fiberlb-native-bgp-vm-smoke.nix {
inherit pkgs;
photoncloudPackages = self.packages.${system};
photoncloudModule = self.nixosModules.default;
nixNosModule = nix-nos.nixosModules.default;
}
);
deployer-bootstrap-e2e = pkgs.runCommand "deployer-bootstrap-e2e" { deployer-bootstrap-e2e = pkgs.runCommand "deployer-bootstrap-e2e" {
nativeBuildInputs = with pkgs; [ nativeBuildInputs = with pkgs; [
bash bash

View file

@ -55,7 +55,7 @@ let
in { in {
config = mkIf (config.nix-nos.enable && cfg.enable && cfg.backend == "gobgp") { config = mkIf (config.nix-nos.enable && cfg.enable && cfg.backend == "gobgp") {
# Install GoBGP package # Install GoBGP package
environment.systemPackages = [ pkgs.gobgp ]; environment.systemPackages = [ pkgs.gobgp pkgs.gobgpd ];
# GoBGP systemd service # GoBGP systemd service
systemd.services.gobgpd = { systemd.services.gobgpd = {
@ -65,7 +65,7 @@ in {
serviceConfig = { serviceConfig = {
Type = "simple"; Type = "simple";
ExecStart = "${pkgs.gobgp}/bin/gobgpd -f ${gobgpConfig}"; ExecStart = "${pkgs.gobgpd}/bin/gobgpd -f ${gobgpConfig}";
Restart = "on-failure"; Restart = "on-failure";
RestartSec = "5s"; RestartSec = "5s";
}; };

View file

@ -3,7 +3,35 @@
let let
cfg = config.services.fiberlb; cfg = config.services.fiberlb;
tomlFormat = pkgs.formats.toml { }; tomlFormat = pkgs.formats.toml { };
fiberlbConfigFile = tomlFormat.generate "fiberlb.toml" { bgpPeerType = lib.types.submodule {
options = {
address = lib.mkOption {
type = lib.types.str;
description = "BGP peer IP address or hostname.";
example = "192.0.2.1";
};
port = lib.mkOption {
type = lib.types.port;
default = 179;
description = "BGP peer TCP port.";
};
asn = lib.mkOption {
type = lib.types.ints.positive;
description = "Peer AS number.";
example = 65020;
};
description = lib.mkOption {
type = lib.types.str;
default = "";
description = "Optional description used for logs and operators.";
};
};
};
fiberlbBaseConfig = {
grpc_addr = "0.0.0.0:${toString cfg.port}"; grpc_addr = "0.0.0.0:${toString cfg.port}";
log_level = "info"; log_level = "info";
auth = { auth = {
@ -12,7 +40,52 @@ let
then cfg.iamAddr then cfg.iamAddr
else "127.0.0.1:50080"; else "127.0.0.1:50080";
}; };
health = {
interval_secs = cfg.healthCheckIntervalSecs;
timeout_secs = cfg.healthCheckTimeoutSecs;
};
vip_advertisement = {
interval_secs = cfg.vipCheckIntervalSecs;
};
} // lib.optionalAttrs cfg.bgp.enable {
bgp =
{
enabled = true;
local_as = cfg.bgp.localAs;
router_id =
if cfg.bgp.routerId != null
then cfg.bgp.routerId
else "127.0.0.1";
hold_time_secs = cfg.bgp.holdTimeSecs;
keepalive_secs = cfg.bgp.keepaliveSecs;
connect_retry_secs = cfg.bgp.connectRetrySecs;
peers = map
(peer: {
inherit (peer) address port asn description;
})
cfg.bgp.peers;
}
// lib.optionalAttrs (cfg.bgp.nextHop != null) {
next_hop = cfg.bgp.nextHop;
};
}; };
fiberlbConfigFile = tomlFormat.generate "fiberlb.toml" (lib.recursiveUpdate fiberlbBaseConfig cfg.settings);
flaredbDependencies = lib.optional (cfg.metadataBackend == "flaredb") "flaredb.service";
normalizedDatabaseUrl =
let
sqliteUrl =
if cfg.databaseUrl != null
&& cfg.metadataBackend == "sqlite"
&& lib.hasPrefix "sqlite:/" cfg.databaseUrl
&& !(lib.hasPrefix "sqlite://" cfg.databaseUrl)
then "sqlite://${lib.removePrefix "sqlite:" cfg.databaseUrl}"
else cfg.databaseUrl;
in
if sqliteUrl != null
&& cfg.metadataBackend == "sqlite"
&& !(lib.hasInfix "?" sqliteUrl)
then "${sqliteUrl}?mode=rwc"
else sqliteUrl;
in in
{ {
options.services.fiberlb = { options.services.fiberlb = {
@ -64,6 +137,72 @@ in
description = "Enable single-node mode (required when metadata backend is SQLite)"; description = "Enable single-node mode (required when metadata backend is SQLite)";
}; };
healthCheckIntervalSecs = lib.mkOption {
type = lib.types.ints.positive;
default = 5;
description = "Interval between FiberLB backend health sweeps.";
};
healthCheckTimeoutSecs = lib.mkOption {
type = lib.types.ints.positive;
default = 5;
description = "Timeout for each FiberLB backend health probe.";
};
vipCheckIntervalSecs = lib.mkOption {
type = lib.types.ints.positive;
default = 3;
description = "Interval between FiberLB VIP-to-BGP reconciliation sweeps.";
};
bgp = {
enable = lib.mkEnableOption "FiberLB native BGP VIP advertisement";
localAs = lib.mkOption {
type = lib.types.ints.positive;
default = 65001;
description = "Local AS number used by FiberLB's native BGP speaker.";
};
routerId = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "IPv4 router ID used by FiberLB's native BGP speaker.";
example = "192.0.2.10";
};
nextHop = lib.mkOption {
type = lib.types.nullOr lib.types.str;
default = null;
description = "Explicit BGP NEXT_HOP address. Defaults to routerId when unset.";
example = "192.0.2.10";
};
holdTimeSecs = lib.mkOption {
type = lib.types.ints.positive;
default = 90;
description = "Requested BGP hold time in seconds.";
};
keepaliveSecs = lib.mkOption {
type = lib.types.ints.positive;
default = 30;
description = "BGP keepalive interval in seconds.";
};
connectRetrySecs = lib.mkOption {
type = lib.types.ints.positive;
default = 5;
description = "Delay before FiberLB reconnects to a failed BGP peer.";
};
peers = lib.mkOption {
type = lib.types.listOf bgpPeerType;
default = [ ];
description = "Static BGP peers for FiberLB's native speaker.";
};
};
dataDir = lib.mkOption { dataDir = lib.mkOption {
type = lib.types.path; type = lib.types.path;
default = "/var/lib/fiberlb"; default = "/var/lib/fiberlb";
@ -84,6 +223,25 @@ in
}; };
config = lib.mkIf cfg.enable { config = lib.mkIf cfg.enable {
assertions = [
{
assertion = cfg.metadataBackend != "sqlite" || cfg.singleNode;
message = "services.fiberlb.singleNode must be true when metadataBackend is sqlite";
}
{
assertion = cfg.metadataBackend == "flaredb" || cfg.databaseUrl != null;
message = "services.fiberlb.databaseUrl is required when metadataBackend is postgres or sqlite";
}
{
assertion = (!cfg.bgp.enable) || cfg.bgp.routerId != null;
message = "services.fiberlb.bgp.routerId must be set when native BGP is enabled";
}
{
assertion = (!cfg.bgp.enable) || ((builtins.length cfg.bgp.peers) > 0);
message = "services.fiberlb.bgp.peers must contain at least one peer when native BGP is enabled";
}
];
# Create system user # Create system user
users.users.fiberlb = { users.users.fiberlb = {
isSystemUser = true; isSystemUser = true;
@ -98,8 +256,8 @@ in
systemd.services.fiberlb = { systemd.services.fiberlb = {
description = "FiberLB Load Balancing Service"; description = "FiberLB Load Balancing Service";
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
after = [ "network.target" "iam.service" "flaredb.service" ]; after = [ "network.target" "iam.service" ] ++ flaredbDependencies;
requires = [ "iam.service" "flaredb.service" ]; requires = [ "iam.service" ] ++ flaredbDependencies;
serviceConfig = { serviceConfig = {
Type = "simple"; Type = "simple";
@ -124,8 +282,8 @@ in
"RUST_LOG=info" "RUST_LOG=info"
"FIBERLB_FLAREDB_ENDPOINT=${if cfg.flaredbAddr != null then cfg.flaredbAddr else "127.0.0.1:2479"}" "FIBERLB_FLAREDB_ENDPOINT=${if cfg.flaredbAddr != null then cfg.flaredbAddr else "127.0.0.1:2479"}"
"FIBERLB_METADATA_BACKEND=${cfg.metadataBackend}" "FIBERLB_METADATA_BACKEND=${cfg.metadataBackend}"
] ++ lib.optional (cfg.databaseUrl != null) "FIBERLB_METADATA_DATABASE_URL=${cfg.databaseUrl}" ] ++ lib.optional (normalizedDatabaseUrl != null) "FIBERLB_METADATA_DATABASE_URL=${normalizedDatabaseUrl}"
++ lib.optional cfg.singleNode "FIBERLB_SINGLE_NODE=1" ++ lib.optional cfg.singleNode "FIBERLB_SINGLE_NODE=true"
++ lib.optional (cfg.chainfireAddr != null) "FIBERLB_CHAINFIRE_ENDPOINT=http://${cfg.chainfireAddr}"; ++ lib.optional (cfg.chainfireAddr != null) "FIBERLB_CHAINFIRE_ENDPOINT=http://${cfg.chainfireAddr}";
# Start command # Start command

View file

@ -122,7 +122,10 @@ in
IAM_DATABASE_URL = cfg.databaseUrl; IAM_DATABASE_URL = cfg.databaseUrl;
}) })
(lib.mkIf cfg.singleNode { (lib.mkIf cfg.singleNode {
IAM_SINGLE_NODE = "1"; IAM_SINGLE_NODE = "true";
})
(lib.mkIf (cfg.storeBackend == "memory") {
IAM_ALLOW_MEMORY_BACKEND = "1";
}) })
]; ];

View file

@ -38,7 +38,7 @@ in {
vips = mkOption { vips = mkOption {
type = types.listOf types.str; type = types.listOf types.str;
default = []; default = [];
description = "VIPs to advertise via BGP (CIDR notation)"; description = "Legacy static VIP hints. FiberLB native BGP ignores this list and advertises active load balancer VIPs dynamically.";
example = [ "203.0.113.10/32" "203.0.113.11/32" ]; example = [ "203.0.113.10/32" "203.0.113.11/32" ];
}; };
@ -75,44 +75,30 @@ in {
assertion = clusterCfg.bgp.asn > 0; assertion = clusterCfg.bgp.asn > 0;
message = "plasmacloud.cluster.bgp.asn must be configured for FiberLB BGP"; message = "plasmacloud.cluster.bgp.asn must be configured for FiberLB BGP";
} }
{
assertion = (length cfg.fiberlbBgp.vips) > 0;
message = "plasmacloud.network.fiberlbBgp.vips must contain at least one VIP";
}
{ {
assertion = (length cfg.fiberlbBgp.peers) > 0; assertion = (length cfg.fiberlbBgp.peers) > 0;
message = "plasmacloud.network.fiberlbBgp.peers must contain at least one BGP peer"; message = "plasmacloud.network.fiberlbBgp.peers must contain at least one BGP peer";
} }
{
assertion = config.services.fiberlb.enable or false;
message = "plasmacloud.network.fiberlbBgp.enable requires services.fiberlb.enable";
}
]; ];
# Wire to nix-nos.bgp (Layer 1) services.fiberlb.bgp = {
nix-nos.enable = true;
nix-nos.bgp = {
enable = true; enable = true;
backend = "gobgp"; # FiberLB uses GoBGP localAs = clusterCfg.bgp.asn;
asn = clusterCfg.bgp.asn;
# Auto-detect router ID from primary IP or use configured value
routerId = routerId =
if cfg.fiberlbBgp.routerId != null if cfg.fiberlbBgp.routerId != null
then cfg.fiberlbBgp.routerId then cfg.fiberlbBgp.routerId
else else
# Fallback to a simple IP extraction from node config
let let
hostname = config.networking.hostName; hostname = config.networking.hostName;
node = clusterCfg.nodes.${hostname} or null; node = clusterCfg.nodes.${hostname} or null;
in in
if node != null then node.ip else "127.0.0.1"; if node != null then node.ip else "127.0.0.1";
peers = cfg.fiberlbBgp.peers; peers = cfg.fiberlbBgp.peers;
# Convert VIPs to BGP announcements
announcements = map (vip: { prefix = vip; }) cfg.fiberlbBgp.vips;
}; };
# FiberLB service configuration (if FiberLB is enabled)
# Note: This assumes fiberlb service is defined elsewhere
# services.fiberlb.bgp.gobgpAddress = mkIf (config.services.fiberlb.enable or false) "127.0.0.1:50051";
}) })
# PrismNET OVN integration # PrismNET OVN integration

View file

@ -0,0 +1,378 @@
{
pkgs,
photoncloudPackages,
photoncloudModule,
nixNosModule,
}:
let
gobgpdConfig = pkgs.writeText "fiberlb-native-bgp-peer.json" (builtins.toJSON {
global = {
config = {
as = 65020;
router-id = "192.168.100.1";
};
};
neighbors = [
{
config = {
neighbor-address = "192.168.100.2";
peer-as = 65010;
description = "fiberlb-under-test";
};
}
];
});
iamProtoDir = ../../iam/proto;
iamProto = "iam.proto";
fiberlbProtoDir = ../../fiberlb/crates/fiberlb-api/proto;
fiberlbProto = "fiberlb.proto";
in
{
name = "fiberlb-native-bgp-vm-smoke";
nodes = {
router =
{ ... }:
{
networking.hostName = "router";
networking.useDHCP = false;
networking.firewall.enable = false;
virtualisation.vlans = [ 1 ];
networking.interfaces.eth1.ipv4.addresses = [
{
address = "192.168.100.1";
prefixLength = 24;
}
];
environment.systemPackages = with pkgs; [
gobgp
gobgpd
jq
];
systemd.services.gobgpd-peer = {
description = "GoBGP test peer for FiberLB native BGP smoke";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${pkgs.gobgpd}/bin/gobgpd -t json -f ${gobgpdConfig} --api-hosts 127.0.0.1:50051 -p";
Restart = "on-failure";
RestartSec = "2s";
};
};
system.stateVersion = "24.11";
};
lb =
{ ... }:
{
imports = [
nixNosModule
photoncloudModule
];
networking.hostName = "lb";
networking.useDHCP = false;
networking.firewall.enable = false;
virtualisation.vlans = [ 1 ];
networking.interfaces.eth1.ipv4.addresses = [
{
address = "192.168.100.2";
prefixLength = 24;
}
];
environment.systemPackages = with pkgs; [
grpcurl
jq
python3
];
services.iam = {
enable = true;
package = photoncloudPackages.iam-server;
port = 50080;
httpPort = 8083;
storeBackend = "memory";
};
systemd.services.iam.environment = {
IAM_ALLOW_RANDOM_SIGNING_KEY = "1";
};
services.fiberlb = {
enable = true;
package = photoncloudPackages.fiberlb-server;
port = 50085;
iamAddr = "192.168.100.2:50080";
metadataBackend = "sqlite";
databaseUrl = "sqlite:/var/lib/fiberlb/metadata.db";
singleNode = true;
healthCheckIntervalSecs = 1;
healthCheckTimeoutSecs = 1;
vipCheckIntervalSecs = 1;
bgp = {
enable = true;
localAs = 65010;
routerId = "192.168.100.2";
nextHop = "192.168.100.2";
holdTimeSecs = 9;
keepaliveSecs = 3;
peers = [
{
address = "192.168.100.1";
port = 179;
asn = 65020;
description = "router-peer";
}
];
};
};
systemd.services.mock-backend = {
description = "FiberLB health-check backend";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" ];
serviceConfig = {
Type = "simple";
ExecStart = "${pkgs.python3}/bin/python -m http.server 18081 --bind 127.0.0.1";
Restart = "always";
RestartSec = "1s";
};
};
system.stateVersion = "24.11";
};
};
testScript = ''
import json
import shlex
import time
IAM_PROTO_DIR = "${iamProtoDir}"
IAM_PROTO = "${iamProto}"
FIBERLB_PROTO_DIR = "${fiberlbProtoDir}"
FIBERLB_PROTO = "${fiberlbProto}"
def grpcurl_json(machine, endpoint, import_path, proto, service, payload, headers=None):
header_args = ""
for header in headers or []:
header_args += f" -H {shlex.quote(header)}"
command = (
f"grpcurl -plaintext{header_args} "
f"-import-path {shlex.quote(import_path)} "
f"-proto {shlex.quote(proto)} "
f"-d {shlex.quote(json.dumps(payload))} "
f"{shlex.quote(endpoint)} {shlex.quote(service)}"
)
status, output = machine.execute(f"timeout 15 sh -lc {shlex.quote(command + ' 2>&1')}")
if status != 0:
raise AssertionError(
"grpcurl failed"
f" service={service}"
f" status={status}"
f" payload={json.dumps(payload, sort_keys=True)}"
f" output={output}"
)
return json.loads(output)
def issue_project_admin_token(machine, org_id, project_id):
principal_id = f"fiberlb-smoke-{int(time.time())}"
deadline = time.time() + 120
def retry(action):
last_error = None
while time.time() < deadline:
try:
return action()
except Exception as exc:
last_error = exc
time.sleep(2)
raise AssertionError(f"IAM bootstrap timed out: {last_error}")
retry(lambda: grpcurl_json(
machine,
"127.0.0.1:50080",
IAM_PROTO_DIR,
IAM_PROTO,
"iam.v1.IamAdmin/CreatePrincipal",
{
"id": principal_id,
"kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
"name": principal_id,
"orgId": org_id,
"projectId": project_id,
},
))
retry(lambda: grpcurl_json(
machine,
"127.0.0.1:50080",
IAM_PROTO_DIR,
IAM_PROTO,
"iam.v1.IamAdmin/CreateBinding",
{
"principal": {
"kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
"id": principal_id,
},
"role": "roles/ProjectAdmin",
"scope": {
"project": {
"id": project_id,
"orgId": org_id,
}
},
},
))
token_response = retry(lambda: grpcurl_json(
machine,
"127.0.0.1:50080",
IAM_PROTO_DIR,
IAM_PROTO,
"iam.v1.IamToken/IssueToken",
{
"principalId": principal_id,
"principalKind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
"scope": {
"project": {
"id": project_id,
"orgId": org_id,
}
},
"ttlSeconds": 3600,
},
))
return token_response["token"]
def wait_for_backend_status(status, backend_id, token):
lb.wait_until_succeeds(
"grpcurl -plaintext "
f"-H {shlex.quote('authorization: Bearer ' + token)} "
f"-import-path {shlex.quote(FIBERLB_PROTO_DIR)} "
f"-proto {shlex.quote(FIBERLB_PROTO)} "
f"-d {shlex.quote(json.dumps({'id': backend_id}))} "
"127.0.0.1:50085 fiberlb.v1.BackendService/GetBackend "
f"| jq -e {shlex.quote(f'.backend.status == \"{status}\"')}"
)
def wait_for_route(prefix, present):
if present:
router.wait_until_succeeds(
f"gobgp -u 127.0.0.1 -p 50051 global rib | grep -F {shlex.quote(prefix)}"
)
else:
deadline = time.time() + 60
while time.time() < deadline:
output = router.succeed("gobgp -u 127.0.0.1 -p 50051 global rib || true")
if prefix not in output:
return
time.sleep(1)
raise AssertionError(f"route {prefix} still present in GoBGP RIB")
start_all()
serial_stdout_off()
router.wait_for_unit("gobgpd-peer.service")
router.wait_until_succeeds("ss -ltnH '( sport = :179 )' | grep -q LISTEN")
lb.wait_for_unit("iam.service")
lb.wait_until_succeeds("ss -ltnH '( sport = :50080 )' | grep -q LISTEN")
lb.wait_for_unit("mock-backend.service")
lb.wait_for_unit("fiberlb.service")
lb.wait_until_succeeds("ss -ltnH '( sport = :50085 )' | grep -q LISTEN")
router.wait_until_succeeds("gobgp -u 127.0.0.1 -p 50051 neighbor | grep -F 192.168.100.2")
token = issue_project_admin_token(lb, "bgp-smoke-org", "bgp-smoke-project")
lb_response = grpcurl_json(
lb,
"127.0.0.1:50085",
FIBERLB_PROTO_DIR,
FIBERLB_PROTO,
"fiberlb.v1.LoadBalancerService/CreateLoadBalancer",
{
"name": "bgp-smoke-lb",
"orgId": "bgp-smoke-org",
"projectId": "bgp-smoke-project",
"description": "native bgp smoke",
},
headers=[f"authorization: Bearer {token}"],
)
loadbalancer = lb_response["loadbalancer"]
lb_id = loadbalancer["id"]
vip_prefix = f"{loadbalancer['vipAddress']}/32"
pool_id = grpcurl_json(
lb,
"127.0.0.1:50085",
FIBERLB_PROTO_DIR,
FIBERLB_PROTO,
"fiberlb.v1.PoolService/CreatePool",
{
"name": "bgp-smoke-pool",
"loadbalancerId": lb_id,
"algorithm": "POOL_ALGORITHM_ROUND_ROBIN",
"protocol": "POOL_PROTOCOL_TCP",
},
headers=[f"authorization: Bearer {token}"],
)["pool"]["id"]
backend_id = grpcurl_json(
lb,
"127.0.0.1:50085",
FIBERLB_PROTO_DIR,
FIBERLB_PROTO,
"fiberlb.v1.BackendService/CreateBackend",
{
"name": "bgp-smoke-backend",
"poolId": pool_id,
"address": "127.0.0.1",
"port": 18081,
"weight": 1,
},
headers=[f"authorization: Bearer {token}"],
)["backend"]["id"]
grpcurl_json(
lb,
"127.0.0.1:50085",
FIBERLB_PROTO_DIR,
FIBERLB_PROTO,
"fiberlb.v1.HealthCheckService/CreateHealthCheck",
{
"name": "bgp-smoke-health",
"poolId": pool_id,
"type": "HEALTH_CHECK_TYPE_HTTP",
"intervalSeconds": 1,
"timeoutSeconds": 1,
"healthyThreshold": 1,
"unhealthyThreshold": 1,
"httpConfig": {
"method": "GET",
"path": "/",
"expectedCodes": [200],
},
},
headers=[f"authorization: Bearer {token}"],
)
wait_for_backend_status("BACKEND_STATUS_ONLINE", backend_id, token)
wait_for_route(vip_prefix, True)
lb.succeed("systemctl stop mock-backend.service")
wait_for_backend_status("BACKEND_STATUS_OFFLINE", backend_id, token)
wait_for_route(vip_prefix, False)
lb.succeed("systemctl start mock-backend.service")
lb.wait_for_unit("mock-backend.service")
wait_for_backend_status("BACKEND_STATUS_ONLINE", backend_id, token)
wait_for_route(vip_prefix, True)
'';
}