fiberlb: add native BGP speaker and VM smoke test
This commit is contained in:
parent
96d46a3603
commit
ce4bab07d6
14 changed files with 1919 additions and 275 deletions
|
|
@ -5,6 +5,6 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
tonic_build::configure()
|
||||
.build_server(false)
|
||||
.build_client(true)
|
||||
.compile(&["proto/api/gobgp.proto"], &["proto"])?;
|
||||
.compile_protos(&["proto/api/gobgp.proto"], &["proto"])?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,7 +1,7 @@
|
|||
//! Server configuration
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::net::SocketAddr;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||
|
||||
/// TLS configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
|
@ -70,6 +70,18 @@ pub struct ServerConfig {
|
|||
/// Authentication configuration
|
||||
#[serde(default)]
|
||||
pub auth: AuthConfig,
|
||||
|
||||
/// Backend health checker configuration
|
||||
#[serde(default)]
|
||||
pub health: HealthRuntimeConfig,
|
||||
|
||||
/// VIP advertisement reconciliation configuration
|
||||
#[serde(default)]
|
||||
pub vip_advertisement: VipAdvertisementConfig,
|
||||
|
||||
/// Native BGP speaker configuration
|
||||
#[serde(default)]
|
||||
pub bgp: BgpConfig,
|
||||
}
|
||||
|
||||
/// Authentication configuration
|
||||
|
|
@ -84,6 +96,160 @@ fn default_iam_server_addr() -> String {
|
|||
"127.0.0.1:50051".to_string()
|
||||
}
|
||||
|
||||
/// Backend health checker runtime configuration.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct HealthRuntimeConfig {
|
||||
/// Interval between backend health check sweeps.
|
||||
#[serde(default = "default_health_check_interval_secs")]
|
||||
pub interval_secs: u64,
|
||||
|
||||
/// Timeout for individual backend checks.
|
||||
#[serde(default = "default_health_check_timeout_secs")]
|
||||
pub timeout_secs: u64,
|
||||
}
|
||||
|
||||
fn default_health_check_interval_secs() -> u64 {
|
||||
5
|
||||
}
|
||||
|
||||
fn default_health_check_timeout_secs() -> u64 {
|
||||
5
|
||||
}
|
||||
|
||||
impl Default for HealthRuntimeConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interval_secs: default_health_check_interval_secs(),
|
||||
timeout_secs: default_health_check_timeout_secs(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// VIP advertisement reconciliation runtime configuration.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VipAdvertisementConfig {
|
||||
/// Interval between BGP advertisement reconciliation sweeps.
|
||||
#[serde(default = "default_vip_check_interval_secs")]
|
||||
pub interval_secs: u64,
|
||||
}
|
||||
|
||||
fn default_vip_check_interval_secs() -> u64 {
|
||||
3
|
||||
}
|
||||
|
||||
impl Default for VipAdvertisementConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interval_secs: default_vip_check_interval_secs(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Static BGP peer configuration.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct BgpPeerConfig {
|
||||
/// Peer IP address or hostname.
|
||||
pub address: String,
|
||||
|
||||
/// Peer TCP port.
|
||||
#[serde(default = "default_bgp_peer_port")]
|
||||
pub port: u16,
|
||||
|
||||
/// Peer AS number.
|
||||
pub asn: u32,
|
||||
|
||||
/// Optional operator-visible description.
|
||||
#[serde(default)]
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
fn default_bgp_peer_port() -> u16 {
|
||||
179
|
||||
}
|
||||
|
||||
/// Native BGP speaker configuration.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BgpConfig {
|
||||
/// Whether FiberLB should originate VIP routes itself.
|
||||
#[serde(default)]
|
||||
pub enabled: bool,
|
||||
|
||||
/// Local AS number.
|
||||
#[serde(default = "default_bgp_local_as")]
|
||||
pub local_as: u32,
|
||||
|
||||
/// BGP router ID. Must be IPv4.
|
||||
#[serde(default = "default_bgp_router_id")]
|
||||
pub router_id: String,
|
||||
|
||||
/// Optional explicit next-hop address. Falls back to router_id.
|
||||
#[serde(default)]
|
||||
pub next_hop: Option<String>,
|
||||
|
||||
/// Requested hold time in seconds.
|
||||
#[serde(default = "default_bgp_hold_time_secs")]
|
||||
pub hold_time_secs: u16,
|
||||
|
||||
/// Keepalive interval in seconds.
|
||||
#[serde(default = "default_bgp_keepalive_secs")]
|
||||
pub keepalive_secs: u16,
|
||||
|
||||
/// Delay before reconnecting to a failed peer.
|
||||
#[serde(default = "default_bgp_connect_retry_secs")]
|
||||
pub connect_retry_secs: u64,
|
||||
|
||||
/// Static peers for outbound eBGP sessions.
|
||||
#[serde(default)]
|
||||
pub peers: Vec<BgpPeerConfig>,
|
||||
}
|
||||
|
||||
fn default_bgp_local_as() -> u32 {
|
||||
65001
|
||||
}
|
||||
|
||||
fn default_bgp_router_id() -> String {
|
||||
Ipv4Addr::new(127, 0, 0, 1).to_string()
|
||||
}
|
||||
|
||||
fn default_bgp_hold_time_secs() -> u16 {
|
||||
90
|
||||
}
|
||||
|
||||
fn default_bgp_keepalive_secs() -> u16 {
|
||||
30
|
||||
}
|
||||
|
||||
fn default_bgp_connect_retry_secs() -> u64 {
|
||||
5
|
||||
}
|
||||
|
||||
impl BgpConfig {
|
||||
/// Effective next hop advertised in UPDATE messages.
|
||||
pub fn next_hop_addr(&self) -> std::result::Result<IpAddr, std::net::AddrParseError> {
|
||||
self.next_hop.as_deref().unwrap_or(&self.router_id).parse()
|
||||
}
|
||||
|
||||
/// Parsed router ID as IPv4.
|
||||
pub fn router_id_addr(&self) -> std::result::Result<Ipv4Addr, std::net::AddrParseError> {
|
||||
self.router_id.parse()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for BgpConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: false,
|
||||
local_as: default_bgp_local_as(),
|
||||
router_id: default_bgp_router_id(),
|
||||
next_hop: None,
|
||||
hold_time_secs: default_bgp_hold_time_secs(),
|
||||
keepalive_secs: default_bgp_keepalive_secs(),
|
||||
connect_retry_secs: default_bgp_connect_retry_secs(),
|
||||
peers: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for AuthConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
|
|
@ -104,6 +270,9 @@ impl Default for ServerConfig {
|
|||
log_level: "info".to_string(),
|
||||
tls: None,
|
||||
auth: AuthConfig::default(),
|
||||
health: HealthRuntimeConfig::default(),
|
||||
vip_advertisement: VipAdvertisementConfig::default(),
|
||||
bgp: BgpConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,8 +104,8 @@ impl HealthChecker {
|
|||
.await
|
||||
.map_err(|e| HealthCheckError::MetadataError(e.to_string()))?;
|
||||
|
||||
// Use first health check config, or default TCP check
|
||||
let hc_config = health_checks.into_iter().next();
|
||||
// Use the first enabled health check config, or default TCP check.
|
||||
let hc_config = health_checks.into_iter().find(|check| check.enabled);
|
||||
|
||||
// Check all backends in the pool
|
||||
let backends = self
|
||||
|
|
@ -210,9 +210,10 @@ impl HealthChecker {
|
|||
);
|
||||
|
||||
// Write request
|
||||
stream.writable().await.map_err(|e| {
|
||||
HealthCheckError::HttpError(format!("stream not writable: {}", e))
|
||||
})?;
|
||||
stream
|
||||
.writable()
|
||||
.await
|
||||
.map_err(|e| HealthCheckError::HttpError(format!("stream not writable: {}", e)))?;
|
||||
|
||||
match stream.try_write(request.as_bytes()) {
|
||||
Ok(_) => {}
|
||||
|
|
@ -223,18 +224,19 @@ impl HealthChecker {
|
|||
|
||||
// Read response (just first line for status code)
|
||||
let mut buf = [0u8; 128];
|
||||
stream.readable().await.map_err(|e| {
|
||||
HealthCheckError::HttpError(format!("stream not readable: {}", e))
|
||||
})?;
|
||||
stream
|
||||
.readable()
|
||||
.await
|
||||
.map_err(|e| HealthCheckError::HttpError(format!("stream not readable: {}", e)))?;
|
||||
|
||||
let n = match stream.try_read(&mut buf) {
|
||||
Ok(n) => n,
|
||||
Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => {
|
||||
// Wait a bit and try again
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
stream.try_read(&mut buf).map_err(|e| {
|
||||
HealthCheckError::HttpError(format!("read failed: {}", e))
|
||||
})?
|
||||
stream
|
||||
.try_read(&mut buf)
|
||||
.map_err(|e| HealthCheckError::HttpError(format!("read failed: {}", e)))?
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(HealthCheckError::HttpError(format!("read failed: {}", e)));
|
||||
|
|
@ -250,8 +252,11 @@ impl HealthChecker {
|
|||
let status_line = response.lines().next().unwrap_or("");
|
||||
|
||||
// Check for 2xx status code
|
||||
if status_line.contains(" 200 ") || status_line.contains(" 201 ") ||
|
||||
status_line.contains(" 202 ") || status_line.contains(" 204 ") {
|
||||
if status_line.contains(" 200 ")
|
||||
|| status_line.contains(" 201 ")
|
||||
|| status_line.contains(" 202 ")
|
||||
|| status_line.contains(" 204 ")
|
||||
{
|
||||
Ok(())
|
||||
} else {
|
||||
Err(HealthCheckError::HttpError(format!(
|
||||
|
|
@ -266,11 +271,13 @@ impl HealthChecker {
|
|||
pub fn spawn_health_checker(
|
||||
metadata: Arc<LbMetadataStore>,
|
||||
check_interval: Duration,
|
||||
check_timeout: Duration,
|
||||
) -> (tokio::task::JoinHandle<()>, watch::Sender<bool>) {
|
||||
let (shutdown_tx, shutdown_rx) = watch::channel(false);
|
||||
|
||||
let handle = tokio::spawn(async move {
|
||||
let mut checker = HealthChecker::new(metadata, check_interval, shutdown_rx);
|
||||
let mut checker =
|
||||
HealthChecker::new(metadata, check_interval, shutdown_rx).with_timeout(check_timeout);
|
||||
checker.run().await;
|
||||
});
|
||||
|
||||
|
|
@ -321,7 +328,8 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_spawn_health_checker() {
|
||||
let metadata = Arc::new(LbMetadataStore::new_in_memory());
|
||||
let (handle, shutdown_tx) = spawn_health_checker(metadata, Duration::from_secs(60));
|
||||
let (handle, shutdown_tx) =
|
||||
spawn_health_checker(metadata, Duration::from_secs(60), Duration::from_secs(5));
|
||||
|
||||
// Verify it started
|
||||
assert!(!handle.is_finished());
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
//! FiberLB server implementation
|
||||
|
||||
pub mod bgp_client;
|
||||
pub mod config;
|
||||
pub mod dataplane;
|
||||
pub mod healthcheck;
|
||||
|
|
@ -9,13 +10,16 @@ pub mod maglev;
|
|||
pub mod metadata;
|
||||
pub mod services;
|
||||
pub mod tls;
|
||||
pub mod vip_manager;
|
||||
|
||||
pub use bgp_client::{create_bgp_client, BgpClient, BgpError, NativeBgpSpeaker};
|
||||
pub use config::ServerConfig;
|
||||
pub use dataplane::DataPlane;
|
||||
pub use healthcheck::{HealthChecker, spawn_health_checker};
|
||||
pub use healthcheck::{spawn_health_checker, HealthChecker};
|
||||
pub use l7_dataplane::L7DataPlane;
|
||||
pub use l7_router::L7Router;
|
||||
pub use maglev::{MaglevTable, ConnectionTracker};
|
||||
pub use maglev::{ConnectionTracker, MaglevTable};
|
||||
pub use metadata::LbMetadataStore;
|
||||
pub use services::*;
|
||||
pub use tls::{build_tls_config, CertificateStore, SniCertResolver};
|
||||
pub use vip_manager::VipManager;
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@
|
|||
//! Implementation of Google's Maglev consistent hashing algorithm for L4 load balancing.
|
||||
//! Reference: https://research.google/pubs/pub44824/
|
||||
|
||||
use fiberlb_types::Backend;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use fiberlb_types::Backend;
|
||||
|
||||
/// Default lookup table size (prime number for better distribution)
|
||||
/// Google's paper uses 65537, but we use a smaller prime for memory efficiency
|
||||
|
|
@ -116,9 +116,7 @@ impl MaglevTable {
|
|||
let offset = Self::hash_offset(backend, size);
|
||||
let skip = Self::hash_skip(backend, size);
|
||||
|
||||
(0..size)
|
||||
.map(|j| (offset + j * skip) % size)
|
||||
.collect()
|
||||
(0..size).map(|j| (offset + j * skip) % size).collect()
|
||||
}
|
||||
|
||||
/// Hash function for offset calculation
|
||||
|
|
@ -134,10 +132,26 @@ impl MaglevTable {
|
|||
let mut hasher = DefaultHasher::new();
|
||||
backend.hash(&mut hasher);
|
||||
"skip".hash(&mut hasher);
|
||||
let skip = (hasher.finish() as usize) % (size - 1) + 1;
|
||||
let mut skip = (hasher.finish() as usize) % (size - 1) + 1;
|
||||
|
||||
// For non-prime table sizes we still need a full permutation, so force
|
||||
// the step to be coprime with the table size.
|
||||
while Self::gcd(skip, size) != 1 {
|
||||
skip = (skip % (size - 1)) + 1;
|
||||
}
|
||||
|
||||
skip
|
||||
}
|
||||
|
||||
fn gcd(mut a: usize, mut b: usize) -> usize {
|
||||
while b != 0 {
|
||||
let remainder = a % b;
|
||||
a = b;
|
||||
b = remainder;
|
||||
}
|
||||
a
|
||||
}
|
||||
|
||||
/// Hash a connection key (e.g., "192.168.1.1:54321")
|
||||
fn hash_key(key: &str) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
|
|
@ -291,11 +305,9 @@ mod tests {
|
|||
|
||||
// Count how many keys map to the same backend
|
||||
let mut unchanged = 0;
|
||||
let mut total = 0;
|
||||
for (key, old_backend) in &mappings {
|
||||
if let Some(idx) = table2.lookup(key) {
|
||||
if let Some(new_backend) = table2.backend_id(idx) {
|
||||
total += 1;
|
||||
// Only keys that were on removed backend should change
|
||||
if old_backend != "10.0.0.2:8080" {
|
||||
if old_backend == new_backend {
|
||||
|
|
|
|||
|
|
@ -4,31 +4,30 @@ use std::sync::Arc;
|
|||
|
||||
use chainfire_client::Client as ChainFireClient;
|
||||
use clap::Parser;
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use fiberlb_api::{
|
||||
backend_service_server::BackendServiceServer,
|
||||
certificate_service_server::CertificateServiceServer,
|
||||
health_check_service_server::HealthCheckServiceServer,
|
||||
l7_policy_service_server::L7PolicyServiceServer, l7_rule_service_server::L7RuleServiceServer,
|
||||
listener_service_server::ListenerServiceServer,
|
||||
load_balancer_service_server::LoadBalancerServiceServer,
|
||||
pool_service_server::PoolServiceServer,
|
||||
backend_service_server::BackendServiceServer,
|
||||
listener_service_server::ListenerServiceServer,
|
||||
health_check_service_server::HealthCheckServiceServer,
|
||||
l7_policy_service_server::L7PolicyServiceServer,
|
||||
l7_rule_service_server::L7RuleServiceServer,
|
||||
certificate_service_server::CertificateServiceServer,
|
||||
};
|
||||
use fiberlb_server::{
|
||||
config::MetadataBackend,
|
||||
LbMetadataStore, LoadBalancerServiceImpl, PoolServiceImpl, BackendServiceImpl,
|
||||
ListenerServiceImpl, HealthCheckServiceImpl, L7PolicyServiceImpl, L7RuleServiceImpl,
|
||||
CertificateServiceImpl, DataPlane, L7DataPlane, ServerConfig,
|
||||
config::MetadataBackend, create_bgp_client, spawn_health_checker, BackendServiceImpl,
|
||||
CertificateServiceImpl, DataPlane, HealthCheckServiceImpl, L7DataPlane, L7PolicyServiceImpl,
|
||||
L7RuleServiceImpl, LbMetadataStore, ListenerServiceImpl, LoadBalancerServiceImpl,
|
||||
PoolServiceImpl, ServerConfig, VipManager,
|
||||
};
|
||||
use iam_service_auth::AuthService;
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
use tonic::transport::{Certificate, Identity, Server, ServerTlsConfig};
|
||||
use tonic::{Request, Status};
|
||||
use tonic_health::server::health_reporter;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
/// FiberLB load balancer server
|
||||
#[derive(Parser, Debug)]
|
||||
|
|
@ -113,8 +112,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
// Initialize tracing
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| EnvFilter::new(&config.log_level)),
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(&config.log_level)),
|
||||
)
|
||||
.init();
|
||||
|
||||
|
|
@ -158,8 +156,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
config.flaredb_endpoint.clone(),
|
||||
config.chainfire_endpoint.clone(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to initialize FlareDB metadata store: {}", e))?,
|
||||
.await
|
||||
.map_err(|e| format!("Failed to initialize FlareDB metadata store: {}", e))?,
|
||||
)
|
||||
}
|
||||
MetadataBackend::Postgres | MetadataBackend::Sqlite => {
|
||||
|
|
@ -187,7 +185,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
};
|
||||
|
||||
// Initialize IAM authentication service
|
||||
tracing::info!("Connecting to IAM server at {}", config.auth.iam_server_addr);
|
||||
tracing::info!(
|
||||
"Connecting to IAM server at {}",
|
||||
config.auth.iam_server_addr
|
||||
);
|
||||
let auth_service = AuthService::new(&config.auth.iam_server_addr)
|
||||
.await
|
||||
.map_err(|e| format!("Failed to connect to IAM server: {}", e))?;
|
||||
|
|
@ -228,6 +229,34 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
|
||||
restore_runtime_listeners(metadata.clone(), dataplane.clone(), l7_dataplane.clone()).await?;
|
||||
|
||||
let (_health_task, health_shutdown_tx) = spawn_health_checker(
|
||||
metadata.clone(),
|
||||
Duration::from_secs(config.health.interval_secs.max(1)),
|
||||
Duration::from_secs(config.health.timeout_secs.max(1)),
|
||||
);
|
||||
|
||||
let vip_manager = if config.bgp.enabled {
|
||||
let next_hop = config.bgp.next_hop_addr().map_err(|error| {
|
||||
format!(
|
||||
"failed to parse FiberLB BGP next hop '{}': {}",
|
||||
config
|
||||
.bgp
|
||||
.next_hop
|
||||
.as_deref()
|
||||
.unwrap_or(&config.bgp.router_id),
|
||||
error
|
||||
)
|
||||
})?;
|
||||
let bgp = create_bgp_client(config.bgp.clone()).await?;
|
||||
let manager = Arc::new(VipManager::new(bgp, metadata.clone(), next_hop));
|
||||
let _vip_task = manager.clone().spawn(Duration::from_secs(
|
||||
config.vip_advertisement.interval_secs.max(1),
|
||||
));
|
||||
Some(manager)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Setup health service
|
||||
let (mut health_reporter, health_service) = health_reporter();
|
||||
health_reporter
|
||||
|
|
@ -289,7 +318,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
|
||||
// Start gRPC server
|
||||
tracing::info!("gRPC server listening on {}", grpc_addr);
|
||||
server
|
||||
let server_result = server
|
||||
.add_service(health_service)
|
||||
.add_service(tonic::codegen::InterceptedService::new(
|
||||
LoadBalancerServiceServer::new(lb_service),
|
||||
|
|
@ -324,8 +353,17 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
make_interceptor(auth_service.clone()),
|
||||
))
|
||||
.serve(grpc_addr)
|
||||
.await?;
|
||||
.await;
|
||||
|
||||
let _ = health_shutdown_tx.send(true);
|
||||
|
||||
if let Some(vip_manager) = vip_manager {
|
||||
if let Err(error) = vip_manager.shutdown().await {
|
||||
tracing::warn!(error = %error, "FiberLB VIP manager shutdown failed");
|
||||
}
|
||||
}
|
||||
|
||||
server_result?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -389,31 +427,29 @@ async fn register_chainfire_membership(
|
|||
let value = format!(r#"{{"addr":"{}","ts":{}}}"#, addr, ts);
|
||||
let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(120);
|
||||
let mut attempt = 0usize;
|
||||
let mut last_error = String::new();
|
||||
|
||||
loop {
|
||||
let last_error = loop {
|
||||
attempt += 1;
|
||||
match ChainFireClient::connect(endpoint).await {
|
||||
let current_error = match ChainFireClient::connect(endpoint).await {
|
||||
Ok(mut client) => match client.put_str(&key, &value).await {
|
||||
Ok(_) => return Ok(()),
|
||||
Err(error) => last_error = format!("put failed: {}", error),
|
||||
Err(error) => format!("put failed: {}", error),
|
||||
},
|
||||
Err(error) => last_error = format!("connect failed: {}", error),
|
||||
}
|
||||
Err(error) => format!("connect failed: {}", error),
|
||||
};
|
||||
|
||||
if tokio::time::Instant::now() >= deadline {
|
||||
break;
|
||||
break current_error;
|
||||
}
|
||||
|
||||
tracing::warn!(
|
||||
attempt,
|
||||
endpoint,
|
||||
service,
|
||||
error = %last_error,
|
||||
error = %current_error,
|
||||
"retrying ChainFire membership registration"
|
||||
);
|
||||
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
||||
}
|
||||
};
|
||||
|
||||
Err(std::io::Error::other(format!(
|
||||
"failed to register ChainFire membership for {} via {} after {} attempts: {}",
|
||||
|
|
@ -435,9 +471,15 @@ async fn restore_runtime_listeners(
|
|||
}
|
||||
|
||||
let result = if listener.is_l7() {
|
||||
l7_dataplane.start_listener(listener.id).await.map_err(|e| e.to_string())
|
||||
l7_dataplane
|
||||
.start_listener(listener.id)
|
||||
.await
|
||||
.map_err(|e| e.to_string())
|
||||
} else {
|
||||
dataplane.start_listener(listener.id).await.map_err(|e| e.to_string())
|
||||
dataplane
|
||||
.start_listener(listener.id)
|
||||
.await
|
||||
.map_err(|e| e.to_string())
|
||||
};
|
||||
|
||||
if let Err(err) = result {
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ use tokio::sync::RwLock;
|
|||
use tokio::time::sleep;
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
use crate::bgp_client::{BgpClient, BgpConfig};
|
||||
use crate::bgp_client::BgpClient;
|
||||
use crate::metadata::LbMetadataStore;
|
||||
use fiberlb_types::LoadBalancerId;
|
||||
|
||||
|
|
@ -43,11 +43,7 @@ pub struct VipManager {
|
|||
|
||||
impl VipManager {
|
||||
/// Create a new VIP manager
|
||||
pub fn new(
|
||||
bgp: Arc<dyn BgpClient>,
|
||||
metadata: Arc<LbMetadataStore>,
|
||||
next_hop: IpAddr,
|
||||
) -> Self {
|
||||
pub fn new(bgp: Arc<dyn BgpClient>, metadata: Arc<LbMetadataStore>, next_hop: IpAddr) -> Self {
|
||||
Self {
|
||||
bgp,
|
||||
metadata,
|
||||
|
|
@ -119,7 +115,10 @@ impl VipManager {
|
|||
}
|
||||
|
||||
/// Check if a load balancer has any healthy backends
|
||||
async fn has_healthy_backends(&self, lb_id: &LoadBalancerId) -> Result<bool, Box<dyn std::error::Error>> {
|
||||
async fn has_healthy_backends(
|
||||
&self,
|
||||
lb_id: &LoadBalancerId,
|
||||
) -> Result<bool, Box<dyn std::error::Error>> {
|
||||
// Get all pools for this load balancer
|
||||
let pools = self.metadata.list_pools(lb_id).await?;
|
||||
|
||||
|
|
@ -129,8 +128,7 @@ impl VipManager {
|
|||
|
||||
// Check if any backend is healthy
|
||||
for backend in backends {
|
||||
use fiberlb_types::BackendStatus;
|
||||
if backend.status == BackendStatus::Online {
|
||||
if backend.is_available() {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
|
|
@ -144,7 +142,10 @@ impl VipManager {
|
|||
/// Compares current advertisements with desired active VIPs and:
|
||||
/// - Announces new VIPs that should be active
|
||||
/// - Withdraws VIPs that should no longer be active
|
||||
async fn reconcile_advertisements(&self, active_vips: &HashSet<IpAddr>) -> Result<(), Box<dyn std::error::Error>> {
|
||||
async fn reconcile_advertisements(
|
||||
&self,
|
||||
active_vips: &HashSet<IpAddr>,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut state = self.vip_state.write().await;
|
||||
|
||||
// Find VIPs to announce (active but not yet advertised)
|
||||
|
|
|
|||
|
|
@ -918,6 +918,15 @@
|
|||
}
|
||||
);
|
||||
|
||||
fiberlb-native-bgp-vm-smoke = pkgs.testers.runNixOSTest (
|
||||
import ./nix/tests/fiberlb-native-bgp-vm-smoke.nix {
|
||||
inherit pkgs;
|
||||
photoncloudPackages = self.packages.${system};
|
||||
photoncloudModule = self.nixosModules.default;
|
||||
nixNosModule = nix-nos.nixosModules.default;
|
||||
}
|
||||
);
|
||||
|
||||
deployer-bootstrap-e2e = pkgs.runCommand "deployer-bootstrap-e2e" {
|
||||
nativeBuildInputs = with pkgs; [
|
||||
bash
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ let
|
|||
in {
|
||||
config = mkIf (config.nix-nos.enable && cfg.enable && cfg.backend == "gobgp") {
|
||||
# Install GoBGP package
|
||||
environment.systemPackages = [ pkgs.gobgp ];
|
||||
environment.systemPackages = [ pkgs.gobgp pkgs.gobgpd ];
|
||||
|
||||
# GoBGP systemd service
|
||||
systemd.services.gobgpd = {
|
||||
|
|
@ -65,7 +65,7 @@ in {
|
|||
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${pkgs.gobgp}/bin/gobgpd -f ${gobgpConfig}";
|
||||
ExecStart = "${pkgs.gobgpd}/bin/gobgpd -f ${gobgpConfig}";
|
||||
Restart = "on-failure";
|
||||
RestartSec = "5s";
|
||||
};
|
||||
|
|
|
|||
|
|
@ -3,7 +3,35 @@
|
|||
let
|
||||
cfg = config.services.fiberlb;
|
||||
tomlFormat = pkgs.formats.toml { };
|
||||
fiberlbConfigFile = tomlFormat.generate "fiberlb.toml" {
|
||||
bgpPeerType = lib.types.submodule {
|
||||
options = {
|
||||
address = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
description = "BGP peer IP address or hostname.";
|
||||
example = "192.0.2.1";
|
||||
};
|
||||
|
||||
port = lib.mkOption {
|
||||
type = lib.types.port;
|
||||
default = 179;
|
||||
description = "BGP peer TCP port.";
|
||||
};
|
||||
|
||||
asn = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
description = "Peer AS number.";
|
||||
example = 65020;
|
||||
};
|
||||
|
||||
description = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "";
|
||||
description = "Optional description used for logs and operators.";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
fiberlbBaseConfig = {
|
||||
grpc_addr = "0.0.0.0:${toString cfg.port}";
|
||||
log_level = "info";
|
||||
auth = {
|
||||
|
|
@ -12,7 +40,52 @@ let
|
|||
then cfg.iamAddr
|
||||
else "127.0.0.1:50080";
|
||||
};
|
||||
health = {
|
||||
interval_secs = cfg.healthCheckIntervalSecs;
|
||||
timeout_secs = cfg.healthCheckTimeoutSecs;
|
||||
};
|
||||
vip_advertisement = {
|
||||
interval_secs = cfg.vipCheckIntervalSecs;
|
||||
};
|
||||
} // lib.optionalAttrs cfg.bgp.enable {
|
||||
bgp =
|
||||
{
|
||||
enabled = true;
|
||||
local_as = cfg.bgp.localAs;
|
||||
router_id =
|
||||
if cfg.bgp.routerId != null
|
||||
then cfg.bgp.routerId
|
||||
else "127.0.0.1";
|
||||
hold_time_secs = cfg.bgp.holdTimeSecs;
|
||||
keepalive_secs = cfg.bgp.keepaliveSecs;
|
||||
connect_retry_secs = cfg.bgp.connectRetrySecs;
|
||||
peers = map
|
||||
(peer: {
|
||||
inherit (peer) address port asn description;
|
||||
})
|
||||
cfg.bgp.peers;
|
||||
}
|
||||
// lib.optionalAttrs (cfg.bgp.nextHop != null) {
|
||||
next_hop = cfg.bgp.nextHop;
|
||||
};
|
||||
};
|
||||
fiberlbConfigFile = tomlFormat.generate "fiberlb.toml" (lib.recursiveUpdate fiberlbBaseConfig cfg.settings);
|
||||
flaredbDependencies = lib.optional (cfg.metadataBackend == "flaredb") "flaredb.service";
|
||||
normalizedDatabaseUrl =
|
||||
let
|
||||
sqliteUrl =
|
||||
if cfg.databaseUrl != null
|
||||
&& cfg.metadataBackend == "sqlite"
|
||||
&& lib.hasPrefix "sqlite:/" cfg.databaseUrl
|
||||
&& !(lib.hasPrefix "sqlite://" cfg.databaseUrl)
|
||||
then "sqlite://${lib.removePrefix "sqlite:" cfg.databaseUrl}"
|
||||
else cfg.databaseUrl;
|
||||
in
|
||||
if sqliteUrl != null
|
||||
&& cfg.metadataBackend == "sqlite"
|
||||
&& !(lib.hasInfix "?" sqliteUrl)
|
||||
then "${sqliteUrl}?mode=rwc"
|
||||
else sqliteUrl;
|
||||
in
|
||||
{
|
||||
options.services.fiberlb = {
|
||||
|
|
@ -64,6 +137,72 @@ in
|
|||
description = "Enable single-node mode (required when metadata backend is SQLite)";
|
||||
};
|
||||
|
||||
healthCheckIntervalSecs = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
default = 5;
|
||||
description = "Interval between FiberLB backend health sweeps.";
|
||||
};
|
||||
|
||||
healthCheckTimeoutSecs = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
default = 5;
|
||||
description = "Timeout for each FiberLB backend health probe.";
|
||||
};
|
||||
|
||||
vipCheckIntervalSecs = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
default = 3;
|
||||
description = "Interval between FiberLB VIP-to-BGP reconciliation sweeps.";
|
||||
};
|
||||
|
||||
bgp = {
|
||||
enable = lib.mkEnableOption "FiberLB native BGP VIP advertisement";
|
||||
|
||||
localAs = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
default = 65001;
|
||||
description = "Local AS number used by FiberLB's native BGP speaker.";
|
||||
};
|
||||
|
||||
routerId = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.str;
|
||||
default = null;
|
||||
description = "IPv4 router ID used by FiberLB's native BGP speaker.";
|
||||
example = "192.0.2.10";
|
||||
};
|
||||
|
||||
nextHop = lib.mkOption {
|
||||
type = lib.types.nullOr lib.types.str;
|
||||
default = null;
|
||||
description = "Explicit BGP NEXT_HOP address. Defaults to routerId when unset.";
|
||||
example = "192.0.2.10";
|
||||
};
|
||||
|
||||
holdTimeSecs = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
default = 90;
|
||||
description = "Requested BGP hold time in seconds.";
|
||||
};
|
||||
|
||||
keepaliveSecs = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
default = 30;
|
||||
description = "BGP keepalive interval in seconds.";
|
||||
};
|
||||
|
||||
connectRetrySecs = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
default = 5;
|
||||
description = "Delay before FiberLB reconnects to a failed BGP peer.";
|
||||
};
|
||||
|
||||
peers = lib.mkOption {
|
||||
type = lib.types.listOf bgpPeerType;
|
||||
default = [ ];
|
||||
description = "Static BGP peers for FiberLB's native speaker.";
|
||||
};
|
||||
};
|
||||
|
||||
dataDir = lib.mkOption {
|
||||
type = lib.types.path;
|
||||
default = "/var/lib/fiberlb";
|
||||
|
|
@ -84,6 +223,25 @@ in
|
|||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
assertions = [
|
||||
{
|
||||
assertion = cfg.metadataBackend != "sqlite" || cfg.singleNode;
|
||||
message = "services.fiberlb.singleNode must be true when metadataBackend is sqlite";
|
||||
}
|
||||
{
|
||||
assertion = cfg.metadataBackend == "flaredb" || cfg.databaseUrl != null;
|
||||
message = "services.fiberlb.databaseUrl is required when metadataBackend is postgres or sqlite";
|
||||
}
|
||||
{
|
||||
assertion = (!cfg.bgp.enable) || cfg.bgp.routerId != null;
|
||||
message = "services.fiberlb.bgp.routerId must be set when native BGP is enabled";
|
||||
}
|
||||
{
|
||||
assertion = (!cfg.bgp.enable) || ((builtins.length cfg.bgp.peers) > 0);
|
||||
message = "services.fiberlb.bgp.peers must contain at least one peer when native BGP is enabled";
|
||||
}
|
||||
];
|
||||
|
||||
# Create system user
|
||||
users.users.fiberlb = {
|
||||
isSystemUser = true;
|
||||
|
|
@ -98,8 +256,8 @@ in
|
|||
systemd.services.fiberlb = {
|
||||
description = "FiberLB Load Balancing Service";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" "iam.service" "flaredb.service" ];
|
||||
requires = [ "iam.service" "flaredb.service" ];
|
||||
after = [ "network.target" "iam.service" ] ++ flaredbDependencies;
|
||||
requires = [ "iam.service" ] ++ flaredbDependencies;
|
||||
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
|
|
@ -124,8 +282,8 @@ in
|
|||
"RUST_LOG=info"
|
||||
"FIBERLB_FLAREDB_ENDPOINT=${if cfg.flaredbAddr != null then cfg.flaredbAddr else "127.0.0.1:2479"}"
|
||||
"FIBERLB_METADATA_BACKEND=${cfg.metadataBackend}"
|
||||
] ++ lib.optional (cfg.databaseUrl != null) "FIBERLB_METADATA_DATABASE_URL=${cfg.databaseUrl}"
|
||||
++ lib.optional cfg.singleNode "FIBERLB_SINGLE_NODE=1"
|
||||
] ++ lib.optional (normalizedDatabaseUrl != null) "FIBERLB_METADATA_DATABASE_URL=${normalizedDatabaseUrl}"
|
||||
++ lib.optional cfg.singleNode "FIBERLB_SINGLE_NODE=true"
|
||||
++ lib.optional (cfg.chainfireAddr != null) "FIBERLB_CHAINFIRE_ENDPOINT=http://${cfg.chainfireAddr}";
|
||||
|
||||
# Start command
|
||||
|
|
|
|||
|
|
@ -122,7 +122,10 @@ in
|
|||
IAM_DATABASE_URL = cfg.databaseUrl;
|
||||
})
|
||||
(lib.mkIf cfg.singleNode {
|
||||
IAM_SINGLE_NODE = "1";
|
||||
IAM_SINGLE_NODE = "true";
|
||||
})
|
||||
(lib.mkIf (cfg.storeBackend == "memory") {
|
||||
IAM_ALLOW_MEMORY_BACKEND = "1";
|
||||
})
|
||||
];
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ in {
|
|||
vips = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
description = "VIPs to advertise via BGP (CIDR notation)";
|
||||
description = "Legacy static VIP hints. FiberLB native BGP ignores this list and advertises active load balancer VIPs dynamically.";
|
||||
example = [ "203.0.113.10/32" "203.0.113.11/32" ];
|
||||
};
|
||||
|
||||
|
|
@ -75,44 +75,30 @@ in {
|
|||
assertion = clusterCfg.bgp.asn > 0;
|
||||
message = "plasmacloud.cluster.bgp.asn must be configured for FiberLB BGP";
|
||||
}
|
||||
{
|
||||
assertion = (length cfg.fiberlbBgp.vips) > 0;
|
||||
message = "plasmacloud.network.fiberlbBgp.vips must contain at least one VIP";
|
||||
}
|
||||
{
|
||||
assertion = (length cfg.fiberlbBgp.peers) > 0;
|
||||
message = "plasmacloud.network.fiberlbBgp.peers must contain at least one BGP peer";
|
||||
}
|
||||
{
|
||||
assertion = config.services.fiberlb.enable or false;
|
||||
message = "plasmacloud.network.fiberlbBgp.enable requires services.fiberlb.enable";
|
||||
}
|
||||
];
|
||||
|
||||
# Wire to nix-nos.bgp (Layer 1)
|
||||
nix-nos.enable = true;
|
||||
nix-nos.bgp = {
|
||||
services.fiberlb.bgp = {
|
||||
enable = true;
|
||||
backend = "gobgp"; # FiberLB uses GoBGP
|
||||
asn = clusterCfg.bgp.asn;
|
||||
|
||||
# Auto-detect router ID from primary IP or use configured value
|
||||
localAs = clusterCfg.bgp.asn;
|
||||
routerId =
|
||||
if cfg.fiberlbBgp.routerId != null
|
||||
then cfg.fiberlbBgp.routerId
|
||||
else
|
||||
# Fallback to a simple IP extraction from node config
|
||||
let
|
||||
hostname = config.networking.hostName;
|
||||
node = clusterCfg.nodes.${hostname} or null;
|
||||
in
|
||||
if node != null then node.ip else "127.0.0.1";
|
||||
|
||||
peers = cfg.fiberlbBgp.peers;
|
||||
|
||||
# Convert VIPs to BGP announcements
|
||||
announcements = map (vip: { prefix = vip; }) cfg.fiberlbBgp.vips;
|
||||
};
|
||||
|
||||
# FiberLB service configuration (if FiberLB is enabled)
|
||||
# Note: This assumes fiberlb service is defined elsewhere
|
||||
# services.fiberlb.bgp.gobgpAddress = mkIf (config.services.fiberlb.enable or false) "127.0.0.1:50051";
|
||||
})
|
||||
|
||||
# PrismNET OVN integration
|
||||
|
|
|
|||
378
nix/tests/fiberlb-native-bgp-vm-smoke.nix
Normal file
378
nix/tests/fiberlb-native-bgp-vm-smoke.nix
Normal file
|
|
@ -0,0 +1,378 @@
|
|||
{
|
||||
pkgs,
|
||||
photoncloudPackages,
|
||||
photoncloudModule,
|
||||
nixNosModule,
|
||||
}:
|
||||
|
||||
let
|
||||
gobgpdConfig = pkgs.writeText "fiberlb-native-bgp-peer.json" (builtins.toJSON {
|
||||
global = {
|
||||
config = {
|
||||
as = 65020;
|
||||
router-id = "192.168.100.1";
|
||||
};
|
||||
};
|
||||
|
||||
neighbors = [
|
||||
{
|
||||
config = {
|
||||
neighbor-address = "192.168.100.2";
|
||||
peer-as = 65010;
|
||||
description = "fiberlb-under-test";
|
||||
};
|
||||
}
|
||||
];
|
||||
});
|
||||
|
||||
iamProtoDir = ../../iam/proto;
|
||||
iamProto = "iam.proto";
|
||||
fiberlbProtoDir = ../../fiberlb/crates/fiberlb-api/proto;
|
||||
fiberlbProto = "fiberlb.proto";
|
||||
in
|
||||
{
|
||||
name = "fiberlb-native-bgp-vm-smoke";
|
||||
|
||||
nodes = {
|
||||
router =
|
||||
{ ... }:
|
||||
{
|
||||
networking.hostName = "router";
|
||||
networking.useDHCP = false;
|
||||
networking.firewall.enable = false;
|
||||
virtualisation.vlans = [ 1 ];
|
||||
networking.interfaces.eth1.ipv4.addresses = [
|
||||
{
|
||||
address = "192.168.100.1";
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
gobgp
|
||||
gobgpd
|
||||
jq
|
||||
];
|
||||
|
||||
systemd.services.gobgpd-peer = {
|
||||
description = "GoBGP test peer for FiberLB native BGP smoke";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${pkgs.gobgpd}/bin/gobgpd -t json -f ${gobgpdConfig} --api-hosts 127.0.0.1:50051 -p";
|
||||
Restart = "on-failure";
|
||||
RestartSec = "2s";
|
||||
};
|
||||
};
|
||||
|
||||
system.stateVersion = "24.11";
|
||||
};
|
||||
|
||||
lb =
|
||||
{ ... }:
|
||||
{
|
||||
imports = [
|
||||
nixNosModule
|
||||
photoncloudModule
|
||||
];
|
||||
|
||||
networking.hostName = "lb";
|
||||
networking.useDHCP = false;
|
||||
networking.firewall.enable = false;
|
||||
virtualisation.vlans = [ 1 ];
|
||||
networking.interfaces.eth1.ipv4.addresses = [
|
||||
{
|
||||
address = "192.168.100.2";
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
grpcurl
|
||||
jq
|
||||
python3
|
||||
];
|
||||
|
||||
services.iam = {
|
||||
enable = true;
|
||||
package = photoncloudPackages.iam-server;
|
||||
port = 50080;
|
||||
httpPort = 8083;
|
||||
storeBackend = "memory";
|
||||
};
|
||||
|
||||
systemd.services.iam.environment = {
|
||||
IAM_ALLOW_RANDOM_SIGNING_KEY = "1";
|
||||
};
|
||||
|
||||
services.fiberlb = {
|
||||
enable = true;
|
||||
package = photoncloudPackages.fiberlb-server;
|
||||
port = 50085;
|
||||
iamAddr = "192.168.100.2:50080";
|
||||
metadataBackend = "sqlite";
|
||||
databaseUrl = "sqlite:/var/lib/fiberlb/metadata.db";
|
||||
singleNode = true;
|
||||
healthCheckIntervalSecs = 1;
|
||||
healthCheckTimeoutSecs = 1;
|
||||
vipCheckIntervalSecs = 1;
|
||||
bgp = {
|
||||
enable = true;
|
||||
localAs = 65010;
|
||||
routerId = "192.168.100.2";
|
||||
nextHop = "192.168.100.2";
|
||||
holdTimeSecs = 9;
|
||||
keepaliveSecs = 3;
|
||||
peers = [
|
||||
{
|
||||
address = "192.168.100.1";
|
||||
port = 179;
|
||||
asn = 65020;
|
||||
description = "router-peer";
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.mock-backend = {
|
||||
description = "FiberLB health-check backend";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${pkgs.python3}/bin/python -m http.server 18081 --bind 127.0.0.1";
|
||||
Restart = "always";
|
||||
RestartSec = "1s";
|
||||
};
|
||||
};
|
||||
|
||||
system.stateVersion = "24.11";
|
||||
};
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
import json
|
||||
import shlex
|
||||
import time
|
||||
|
||||
IAM_PROTO_DIR = "${iamProtoDir}"
|
||||
IAM_PROTO = "${iamProto}"
|
||||
FIBERLB_PROTO_DIR = "${fiberlbProtoDir}"
|
||||
FIBERLB_PROTO = "${fiberlbProto}"
|
||||
|
||||
def grpcurl_json(machine, endpoint, import_path, proto, service, payload, headers=None):
|
||||
header_args = ""
|
||||
for header in headers or []:
|
||||
header_args += f" -H {shlex.quote(header)}"
|
||||
command = (
|
||||
f"grpcurl -plaintext{header_args} "
|
||||
f"-import-path {shlex.quote(import_path)} "
|
||||
f"-proto {shlex.quote(proto)} "
|
||||
f"-d {shlex.quote(json.dumps(payload))} "
|
||||
f"{shlex.quote(endpoint)} {shlex.quote(service)}"
|
||||
)
|
||||
status, output = machine.execute(f"timeout 15 sh -lc {shlex.quote(command + ' 2>&1')}")
|
||||
if status != 0:
|
||||
raise AssertionError(
|
||||
"grpcurl failed"
|
||||
f" service={service}"
|
||||
f" status={status}"
|
||||
f" payload={json.dumps(payload, sort_keys=True)}"
|
||||
f" output={output}"
|
||||
)
|
||||
return json.loads(output)
|
||||
|
||||
def issue_project_admin_token(machine, org_id, project_id):
|
||||
principal_id = f"fiberlb-smoke-{int(time.time())}"
|
||||
deadline = time.time() + 120
|
||||
|
||||
def retry(action):
|
||||
last_error = None
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
return action()
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
time.sleep(2)
|
||||
raise AssertionError(f"IAM bootstrap timed out: {last_error}")
|
||||
|
||||
retry(lambda: grpcurl_json(
|
||||
machine,
|
||||
"127.0.0.1:50080",
|
||||
IAM_PROTO_DIR,
|
||||
IAM_PROTO,
|
||||
"iam.v1.IamAdmin/CreatePrincipal",
|
||||
{
|
||||
"id": principal_id,
|
||||
"kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
||||
"name": principal_id,
|
||||
"orgId": org_id,
|
||||
"projectId": project_id,
|
||||
},
|
||||
))
|
||||
retry(lambda: grpcurl_json(
|
||||
machine,
|
||||
"127.0.0.1:50080",
|
||||
IAM_PROTO_DIR,
|
||||
IAM_PROTO,
|
||||
"iam.v1.IamAdmin/CreateBinding",
|
||||
{
|
||||
"principal": {
|
||||
"kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
||||
"id": principal_id,
|
||||
},
|
||||
"role": "roles/ProjectAdmin",
|
||||
"scope": {
|
||||
"project": {
|
||||
"id": project_id,
|
||||
"orgId": org_id,
|
||||
}
|
||||
},
|
||||
},
|
||||
))
|
||||
token_response = retry(lambda: grpcurl_json(
|
||||
machine,
|
||||
"127.0.0.1:50080",
|
||||
IAM_PROTO_DIR,
|
||||
IAM_PROTO,
|
||||
"iam.v1.IamToken/IssueToken",
|
||||
{
|
||||
"principalId": principal_id,
|
||||
"principalKind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
||||
"scope": {
|
||||
"project": {
|
||||
"id": project_id,
|
||||
"orgId": org_id,
|
||||
}
|
||||
},
|
||||
"ttlSeconds": 3600,
|
||||
},
|
||||
))
|
||||
return token_response["token"]
|
||||
|
||||
def wait_for_backend_status(status, backend_id, token):
|
||||
lb.wait_until_succeeds(
|
||||
"grpcurl -plaintext "
|
||||
f"-H {shlex.quote('authorization: Bearer ' + token)} "
|
||||
f"-import-path {shlex.quote(FIBERLB_PROTO_DIR)} "
|
||||
f"-proto {shlex.quote(FIBERLB_PROTO)} "
|
||||
f"-d {shlex.quote(json.dumps({'id': backend_id}))} "
|
||||
"127.0.0.1:50085 fiberlb.v1.BackendService/GetBackend "
|
||||
f"| jq -e {shlex.quote(f'.backend.status == \"{status}\"')}"
|
||||
)
|
||||
|
||||
def wait_for_route(prefix, present):
|
||||
if present:
|
||||
router.wait_until_succeeds(
|
||||
f"gobgp -u 127.0.0.1 -p 50051 global rib | grep -F {shlex.quote(prefix)}"
|
||||
)
|
||||
else:
|
||||
deadline = time.time() + 60
|
||||
while time.time() < deadline:
|
||||
output = router.succeed("gobgp -u 127.0.0.1 -p 50051 global rib || true")
|
||||
if prefix not in output:
|
||||
return
|
||||
time.sleep(1)
|
||||
raise AssertionError(f"route {prefix} still present in GoBGP RIB")
|
||||
|
||||
start_all()
|
||||
serial_stdout_off()
|
||||
|
||||
router.wait_for_unit("gobgpd-peer.service")
|
||||
router.wait_until_succeeds("ss -ltnH '( sport = :179 )' | grep -q LISTEN")
|
||||
lb.wait_for_unit("iam.service")
|
||||
lb.wait_until_succeeds("ss -ltnH '( sport = :50080 )' | grep -q LISTEN")
|
||||
lb.wait_for_unit("mock-backend.service")
|
||||
lb.wait_for_unit("fiberlb.service")
|
||||
lb.wait_until_succeeds("ss -ltnH '( sport = :50085 )' | grep -q LISTEN")
|
||||
|
||||
router.wait_until_succeeds("gobgp -u 127.0.0.1 -p 50051 neighbor | grep -F 192.168.100.2")
|
||||
|
||||
token = issue_project_admin_token(lb, "bgp-smoke-org", "bgp-smoke-project")
|
||||
|
||||
lb_response = grpcurl_json(
|
||||
lb,
|
||||
"127.0.0.1:50085",
|
||||
FIBERLB_PROTO_DIR,
|
||||
FIBERLB_PROTO,
|
||||
"fiberlb.v1.LoadBalancerService/CreateLoadBalancer",
|
||||
{
|
||||
"name": "bgp-smoke-lb",
|
||||
"orgId": "bgp-smoke-org",
|
||||
"projectId": "bgp-smoke-project",
|
||||
"description": "native bgp smoke",
|
||||
},
|
||||
headers=[f"authorization: Bearer {token}"],
|
||||
)
|
||||
loadbalancer = lb_response["loadbalancer"]
|
||||
lb_id = loadbalancer["id"]
|
||||
vip_prefix = f"{loadbalancer['vipAddress']}/32"
|
||||
|
||||
pool_id = grpcurl_json(
|
||||
lb,
|
||||
"127.0.0.1:50085",
|
||||
FIBERLB_PROTO_DIR,
|
||||
FIBERLB_PROTO,
|
||||
"fiberlb.v1.PoolService/CreatePool",
|
||||
{
|
||||
"name": "bgp-smoke-pool",
|
||||
"loadbalancerId": lb_id,
|
||||
"algorithm": "POOL_ALGORITHM_ROUND_ROBIN",
|
||||
"protocol": "POOL_PROTOCOL_TCP",
|
||||
},
|
||||
headers=[f"authorization: Bearer {token}"],
|
||||
)["pool"]["id"]
|
||||
|
||||
backend_id = grpcurl_json(
|
||||
lb,
|
||||
"127.0.0.1:50085",
|
||||
FIBERLB_PROTO_DIR,
|
||||
FIBERLB_PROTO,
|
||||
"fiberlb.v1.BackendService/CreateBackend",
|
||||
{
|
||||
"name": "bgp-smoke-backend",
|
||||
"poolId": pool_id,
|
||||
"address": "127.0.0.1",
|
||||
"port": 18081,
|
||||
"weight": 1,
|
||||
},
|
||||
headers=[f"authorization: Bearer {token}"],
|
||||
)["backend"]["id"]
|
||||
|
||||
grpcurl_json(
|
||||
lb,
|
||||
"127.0.0.1:50085",
|
||||
FIBERLB_PROTO_DIR,
|
||||
FIBERLB_PROTO,
|
||||
"fiberlb.v1.HealthCheckService/CreateHealthCheck",
|
||||
{
|
||||
"name": "bgp-smoke-health",
|
||||
"poolId": pool_id,
|
||||
"type": "HEALTH_CHECK_TYPE_HTTP",
|
||||
"intervalSeconds": 1,
|
||||
"timeoutSeconds": 1,
|
||||
"healthyThreshold": 1,
|
||||
"unhealthyThreshold": 1,
|
||||
"httpConfig": {
|
||||
"method": "GET",
|
||||
"path": "/",
|
||||
"expectedCodes": [200],
|
||||
},
|
||||
},
|
||||
headers=[f"authorization: Bearer {token}"],
|
||||
)
|
||||
|
||||
wait_for_backend_status("BACKEND_STATUS_ONLINE", backend_id, token)
|
||||
wait_for_route(vip_prefix, True)
|
||||
|
||||
lb.succeed("systemctl stop mock-backend.service")
|
||||
wait_for_backend_status("BACKEND_STATUS_OFFLINE", backend_id, token)
|
||||
wait_for_route(vip_prefix, False)
|
||||
|
||||
lb.succeed("systemctl start mock-backend.service")
|
||||
lb.wait_for_unit("mock-backend.service")
|
||||
wait_for_backend_status("BACKEND_STATUS_ONLINE", backend_id, token)
|
||||
wait_for_route(vip_prefix, True)
|
||||
'';
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue