fiberlb: add BGP interop, drain, and policy validation
This commit is contained in:
parent
67d4523adf
commit
ce979d8f26
10 changed files with 2667 additions and 55 deletions
|
|
@ -48,6 +48,7 @@ message CreateLoadBalancerRequest {
|
||||||
string org_id = 2;
|
string org_id = 2;
|
||||||
string project_id = 3;
|
string project_id = 3;
|
||||||
string description = 4;
|
string description = 4;
|
||||||
|
string vip_address = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message CreateLoadBalancerResponse {
|
message CreateLoadBalancerResponse {
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -135,16 +135,34 @@ pub struct VipAdvertisementConfig {
|
||||||
/// Interval between BGP advertisement reconciliation sweeps.
|
/// Interval between BGP advertisement reconciliation sweeps.
|
||||||
#[serde(default = "default_vip_check_interval_secs")]
|
#[serde(default = "default_vip_check_interval_secs")]
|
||||||
pub interval_secs: u64,
|
pub interval_secs: u64,
|
||||||
|
|
||||||
|
/// Presence of this file puts the node into control-plane drain mode.
|
||||||
|
#[serde(default = "default_vip_drain_file")]
|
||||||
|
pub drain_file: String,
|
||||||
|
|
||||||
|
/// Time to keep a locally owned VIP after withdrawing it for drain.
|
||||||
|
#[serde(default = "default_vip_drain_hold_time_secs")]
|
||||||
|
pub drain_hold_time_secs: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_vip_check_interval_secs() -> u64 {
|
fn default_vip_check_interval_secs() -> u64 {
|
||||||
3
|
3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn default_vip_drain_file() -> String {
|
||||||
|
"/var/lib/fiberlb/drain".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_vip_drain_hold_time_secs() -> u64 {
|
||||||
|
5
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for VipAdvertisementConfig {
|
impl Default for VipAdvertisementConfig {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
interval_secs: default_vip_check_interval_secs(),
|
interval_secs: default_vip_check_interval_secs(),
|
||||||
|
drain_file: default_vip_drain_file(),
|
||||||
|
drain_hold_time_secs: default_vip_drain_hold_time_secs(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -190,12 +208,84 @@ pub struct BgpPeerConfig {
|
||||||
/// Optional operator-visible description.
|
/// Optional operator-visible description.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub description: String,
|
pub description: String,
|
||||||
|
|
||||||
|
/// Optional export policy applied to announcements sent to this peer.
|
||||||
|
#[serde(default)]
|
||||||
|
pub export_policy: BgpExportPolicyConfig,
|
||||||
|
|
||||||
|
/// Optional single-hop BFD session parameters for this peer.
|
||||||
|
#[serde(default)]
|
||||||
|
pub bfd: BfdConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_bgp_peer_port() -> u16 {
|
fn default_bgp_peer_port() -> u16 {
|
||||||
179
|
179
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Peer-scoped BGP export policy.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||||
|
pub struct BgpExportPolicyConfig {
|
||||||
|
/// Optional MED attached to announced VIP routes.
|
||||||
|
#[serde(default)]
|
||||||
|
pub med: Option<u32>,
|
||||||
|
|
||||||
|
/// Optional standard communities attached to announced VIP routes.
|
||||||
|
#[serde(default)]
|
||||||
|
pub communities: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Single-hop BFD configuration for a BGP peer.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
pub struct BfdConfig {
|
||||||
|
/// Whether BFD should gate route advertisement for this peer.
|
||||||
|
#[serde(default)]
|
||||||
|
pub enabled: bool,
|
||||||
|
|
||||||
|
/// Desired transmit interval in milliseconds.
|
||||||
|
#[serde(default = "default_bfd_desired_min_tx_millis")]
|
||||||
|
pub desired_min_tx_millis: u64,
|
||||||
|
|
||||||
|
/// Required receive interval in milliseconds.
|
||||||
|
#[serde(default = "default_bfd_required_min_rx_millis")]
|
||||||
|
pub required_min_rx_millis: u64,
|
||||||
|
|
||||||
|
/// Detection multiplier.
|
||||||
|
#[serde(default = "default_bfd_detect_multiplier")]
|
||||||
|
pub detect_multiplier: u8,
|
||||||
|
|
||||||
|
/// Maximum time to wait for the session to reach Up after BGP establishment.
|
||||||
|
#[serde(default = "default_bfd_bootstrap_timeout_secs")]
|
||||||
|
pub bootstrap_timeout_secs: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_bfd_desired_min_tx_millis() -> u64 {
|
||||||
|
300
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_bfd_required_min_rx_millis() -> u64 {
|
||||||
|
300
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_bfd_detect_multiplier() -> u8 {
|
||||||
|
3
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_bfd_bootstrap_timeout_secs() -> u64 {
|
||||||
|
10
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for BfdConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
enabled: false,
|
||||||
|
desired_min_tx_millis: default_bfd_desired_min_tx_millis(),
|
||||||
|
required_min_rx_millis: default_bfd_required_min_rx_millis(),
|
||||||
|
detect_multiplier: default_bfd_detect_multiplier(),
|
||||||
|
bootstrap_timeout_secs: default_bfd_bootstrap_timeout_secs(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Native BGP speaker configuration.
|
/// Native BGP speaker configuration.
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct BgpConfig {
|
pub struct BgpConfig {
|
||||||
|
|
|
||||||
|
|
@ -148,6 +148,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
"fiberlb_bgp_peer_session_up",
|
"fiberlb_bgp_peer_session_up",
|
||||||
"Per-peer BGP session state (1=established, 0=down)"
|
"Per-peer BGP session state (1=established, 0=down)"
|
||||||
);
|
);
|
||||||
|
metrics::describe_gauge!(
|
||||||
|
"fiberlb_bgp_peer_bfd_up",
|
||||||
|
"Per-peer BFD session state for FiberLB native BGP peers (1=up, 0=down)"
|
||||||
|
);
|
||||||
metrics::describe_counter!(
|
metrics::describe_counter!(
|
||||||
"fiberlb_bgp_session_established_total",
|
"fiberlb_bgp_session_established_total",
|
||||||
"Total number of BGP peer sessions established"
|
"Total number of BGP peer sessions established"
|
||||||
|
|
@ -156,6 +160,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
"fiberlb_bgp_session_ends_total",
|
"fiberlb_bgp_session_ends_total",
|
||||||
"Total number of BGP peer session terminations by peer and result"
|
"Total number of BGP peer session terminations by peer and result"
|
||||||
);
|
);
|
||||||
|
metrics::describe_gauge!(
|
||||||
|
"fiberlb_vip_drain_active",
|
||||||
|
"Whether FiberLB node drain mode is active (1=drain, 0=normal)"
|
||||||
|
);
|
||||||
|
|
||||||
if let Some(endpoint) = &config.chainfire_endpoint {
|
if let Some(endpoint) = &config.chainfire_endpoint {
|
||||||
tracing::info!(" Cluster coordination: ChainFire @ {}", endpoint);
|
tracing::info!(" Cluster coordination: ChainFire @ {}", endpoint);
|
||||||
|
|
@ -280,7 +288,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
let manager = Arc::new(VipManager::new(bgp, metadata.clone(), next_hop, vip_owner));
|
let manager = Arc::new(VipManager::new(
|
||||||
|
bgp,
|
||||||
|
metadata.clone(),
|
||||||
|
next_hop,
|
||||||
|
vip_owner,
|
||||||
|
config.vip_advertisement.drain_file.clone(),
|
||||||
|
Duration::from_secs(config.vip_advertisement.drain_hold_time_secs),
|
||||||
|
));
|
||||||
let _vip_task = manager.clone().spawn(Duration::from_secs(
|
let _vip_task = manager.clone().spawn(Duration::from_secs(
|
||||||
config.vip_advertisement.interval_secs.max(1),
|
config.vip_advertisement.interval_secs.max(1),
|
||||||
));
|
));
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
//! LoadBalancer service implementation
|
//! LoadBalancer service implementation
|
||||||
|
|
||||||
|
use std::net::IpAddr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use base64::Engine as _;
|
use base64::Engine as _;
|
||||||
|
|
@ -31,6 +32,44 @@ impl LoadBalancerServiceImpl {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn normalize_requested_vip(vip_address: &str) -> Result<Option<String>, Status> {
|
||||||
|
let trimmed = vip_address.trim();
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let vip: IpAddr = trimmed
|
||||||
|
.parse()
|
||||||
|
.map_err(|_| Status::invalid_argument("vip_address must be a valid IP address"))?;
|
||||||
|
|
||||||
|
if vip.is_unspecified() || vip.is_multicast() {
|
||||||
|
return Err(Status::invalid_argument(
|
||||||
|
"vip_address must be a usable unicast address",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Some(vip.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn ensure_vip_available(metadata: &LbMetadataStore, vip: &str) -> Result<(), Status> {
|
||||||
|
let lbs = metadata
|
||||||
|
.list_all_lbs()
|
||||||
|
.await
|
||||||
|
.map_err(|e| Status::internal(format!("metadata error: {}", e)))?;
|
||||||
|
|
||||||
|
if lbs
|
||||||
|
.iter()
|
||||||
|
.any(|lb| lb.vip_address.as_deref() == Some(vip))
|
||||||
|
{
|
||||||
|
return Err(Status::already_exists(format!(
|
||||||
|
"vip_address {} is already in use",
|
||||||
|
vip
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
const ACTION_LB_CREATE: &str = "network:loadbalancers:create";
|
const ACTION_LB_CREATE: &str = "network:loadbalancers:create";
|
||||||
const ACTION_LB_READ: &str = "network:loadbalancers:read";
|
const ACTION_LB_READ: &str = "network:loadbalancers:read";
|
||||||
const ACTION_LB_LIST: &str = "network:loadbalancers:list";
|
const ACTION_LB_LIST: &str = "network:loadbalancers:list";
|
||||||
|
|
@ -98,11 +137,16 @@ impl LoadBalancerService for LoadBalancerServiceImpl {
|
||||||
lb.description = Some(req.description);
|
lb.description = Some(req.description);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate VIP from pool
|
let requested_vip = normalize_requested_vip(&req.vip_address)?;
|
||||||
let vip = self.metadata
|
let vip = if let Some(vip) = requested_vip {
|
||||||
|
ensure_vip_available(&self.metadata, &vip).await?;
|
||||||
|
vip
|
||||||
|
} else {
|
||||||
|
self.metadata
|
||||||
.allocate_vip()
|
.allocate_vip()
|
||||||
.await
|
.await
|
||||||
.map_err(|e| Status::resource_exhausted(format!("failed to allocate VIP: {}", e)))?;
|
.map_err(|e| Status::resource_exhausted(format!("failed to allocate VIP: {}", e)))?
|
||||||
|
};
|
||||||
lb.vip_address = Some(vip);
|
lb.vip_address = Some(vip);
|
||||||
|
|
||||||
// Save load balancer
|
// Save load balancer
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::net::IpAddr;
|
use std::net::IpAddr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use tokio::sync::{watch, RwLock};
|
use tokio::sync::{watch, RwLock};
|
||||||
use tokio::time::sleep;
|
use tokio::time::sleep;
|
||||||
|
|
@ -17,6 +17,8 @@ use crate::metadata::LbMetadataStore;
|
||||||
use crate::vip_owner::VipAddressOwner;
|
use crate::vip_owner::VipAddressOwner;
|
||||||
use fiberlb_types::LoadBalancerId;
|
use fiberlb_types::LoadBalancerId;
|
||||||
|
|
||||||
|
const METRIC_VIP_DRAIN_ACTIVE: &str = "fiberlb_vip_drain_active";
|
||||||
|
|
||||||
/// Current local control-plane state for a VIP.
|
/// Current local control-plane state for a VIP.
|
||||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||||
struct VipState {
|
struct VipState {
|
||||||
|
|
@ -24,6 +26,8 @@ struct VipState {
|
||||||
owned: bool,
|
owned: bool,
|
||||||
/// The VIP is advertised to BGP peers.
|
/// The VIP is advertised to BGP peers.
|
||||||
advertised: bool,
|
advertised: bool,
|
||||||
|
/// When the node entered drain while this VIP was active.
|
||||||
|
drain_started_at: Option<Instant>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VipState {
|
impl VipState {
|
||||||
|
|
@ -48,6 +52,10 @@ pub struct VipManager {
|
||||||
vip_owner: Option<Arc<dyn VipAddressOwner>>,
|
vip_owner: Option<Arc<dyn VipAddressOwner>>,
|
||||||
/// Router's own IP address (used as BGP next hop)
|
/// Router's own IP address (used as BGP next hop)
|
||||||
next_hop: IpAddr,
|
next_hop: IpAddr,
|
||||||
|
/// Presence of this file activates drain mode.
|
||||||
|
drain_file: String,
|
||||||
|
/// How long to keep a locally-owned VIP after withdrawing it for drain.
|
||||||
|
drain_hold_time: Duration,
|
||||||
/// Shutdown signal for the background reconciliation task.
|
/// Shutdown signal for the background reconciliation task.
|
||||||
shutdown: watch::Sender<bool>,
|
shutdown: watch::Sender<bool>,
|
||||||
}
|
}
|
||||||
|
|
@ -59,6 +67,8 @@ impl VipManager {
|
||||||
metadata: Arc<LbMetadataStore>,
|
metadata: Arc<LbMetadataStore>,
|
||||||
next_hop: IpAddr,
|
next_hop: IpAddr,
|
||||||
vip_owner: Option<Arc<dyn VipAddressOwner>>,
|
vip_owner: Option<Arc<dyn VipAddressOwner>>,
|
||||||
|
drain_file: String,
|
||||||
|
drain_hold_time: Duration,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let (shutdown, _shutdown_rx) = watch::channel(false);
|
let (shutdown, _shutdown_rx) = watch::channel(false);
|
||||||
Self {
|
Self {
|
||||||
|
|
@ -67,6 +77,8 @@ impl VipManager {
|
||||||
vip_state: Arc::new(RwLock::new(HashMap::new())),
|
vip_state: Arc::new(RwLock::new(HashMap::new())),
|
||||||
vip_owner,
|
vip_owner,
|
||||||
next_hop,
|
next_hop,
|
||||||
|
drain_file,
|
||||||
|
drain_hold_time,
|
||||||
shutdown,
|
shutdown,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -141,12 +153,24 @@ impl VipManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let drain_active = self.is_drain_active().await;
|
||||||
|
metrics::gauge!(METRIC_VIP_DRAIN_ACTIVE).set(if drain_active { 1.0 } else { 0.0 });
|
||||||
|
|
||||||
// Update BGP advertisements
|
// Update BGP advertisements
|
||||||
self.reconcile_advertisements(&active_vips).await?;
|
self.reconcile_advertisements(&active_vips, drain_active).await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn is_drain_active(&self) -> bool {
|
||||||
|
let path = self.drain_file.trim();
|
||||||
|
if path.is_empty() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
tokio::fs::metadata(path).await.is_ok()
|
||||||
|
}
|
||||||
|
|
||||||
/// Check if a load balancer has any healthy backends
|
/// Check if a load balancer has any healthy backends
|
||||||
async fn has_healthy_backends(
|
async fn has_healthy_backends(
|
||||||
&self,
|
&self,
|
||||||
|
|
@ -178,13 +202,58 @@ impl VipManager {
|
||||||
async fn reconcile_advertisements(
|
async fn reconcile_advertisements(
|
||||||
&self,
|
&self,
|
||||||
active_vips: &HashSet<IpAddr>,
|
active_vips: &HashSet<IpAddr>,
|
||||||
|
drain_active: bool,
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let mut state = self.vip_state.write().await;
|
let mut state = self.vip_state.write().await;
|
||||||
|
let now = Instant::now();
|
||||||
|
|
||||||
for vip in active_vips {
|
for vip in active_vips {
|
||||||
let mut vip_state = state.get(vip).copied().unwrap_or_default();
|
let mut vip_state = state.get(vip).copied().unwrap_or_default();
|
||||||
let mut changed = false;
|
let mut changed = false;
|
||||||
|
|
||||||
|
if drain_active {
|
||||||
|
if vip_state.advertised {
|
||||||
|
info!("Withdrawing VIP {} for node drain", vip);
|
||||||
|
if let Err(error) = self.bgp.withdraw_route(*vip).await {
|
||||||
|
error!("Failed to withdraw VIP {} for drain: {}", vip, error);
|
||||||
|
} else {
|
||||||
|
vip_state.advertised = false;
|
||||||
|
vip_state.drain_started_at = Some(now);
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
} else if vip_state.owned && vip_state.drain_started_at.is_none() {
|
||||||
|
vip_state.drain_started_at = Some(now);
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if vip_state.owned {
|
||||||
|
let drain_started_at = vip_state.drain_started_at.unwrap_or(now);
|
||||||
|
if now.duration_since(drain_started_at) >= self.drain_hold_time {
|
||||||
|
if let Some(vip_owner) = &self.vip_owner {
|
||||||
|
info!("Releasing local VIP {} after drain hold", vip);
|
||||||
|
if let Err(error) = vip_owner.ensure_absent(*vip).await {
|
||||||
|
error!("Failed to release local VIP {} after drain: {}", vip, error);
|
||||||
|
} else {
|
||||||
|
vip_state.owned = false;
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
vip_state.owned = false;
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if vip_state.is_idle() {
|
||||||
|
state.remove(vip);
|
||||||
|
} else if changed {
|
||||||
|
state.insert(*vip, vip_state);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
vip_state.drain_started_at = None;
|
||||||
|
|
||||||
if !vip_state.owned {
|
if !vip_state.owned {
|
||||||
if let Some(vip_owner) = &self.vip_owner {
|
if let Some(vip_owner) = &self.vip_owner {
|
||||||
info!("Claiming local VIP {} on this node", vip);
|
info!("Claiming local VIP {} on this node", vip);
|
||||||
|
|
@ -220,6 +289,7 @@ impl VipManager {
|
||||||
for vip in managed_vips {
|
for vip in managed_vips {
|
||||||
if !active_vips.contains(&vip) {
|
if !active_vips.contains(&vip) {
|
||||||
let mut vip_state = state.get(&vip).copied().unwrap_or_default();
|
let mut vip_state = state.get(&vip).copied().unwrap_or_default();
|
||||||
|
vip_state.drain_started_at = None;
|
||||||
|
|
||||||
if vip_state.owned {
|
if vip_state.owned {
|
||||||
if let Some(vip_owner) = &self.vip_owner {
|
if let Some(vip_owner) = &self.vip_owner {
|
||||||
|
|
@ -266,6 +336,7 @@ impl VipManager {
|
||||||
}
|
}
|
||||||
vip_state.owned = true;
|
vip_state.owned = true;
|
||||||
}
|
}
|
||||||
|
vip_state.drain_started_at = None;
|
||||||
|
|
||||||
if !vip_state.advertised {
|
if !vip_state.advertised {
|
||||||
info!("Manually advertising VIP {}", vip);
|
info!("Manually advertising VIP {}", vip);
|
||||||
|
|
@ -290,6 +361,7 @@ impl VipManager {
|
||||||
}
|
}
|
||||||
vip_state.owned = false;
|
vip_state.owned = false;
|
||||||
}
|
}
|
||||||
|
vip_state.drain_started_at = None;
|
||||||
|
|
||||||
if vip_state.advertised {
|
if vip_state.advertised {
|
||||||
info!("Manually withdrawing VIP {}", vip);
|
info!("Manually withdrawing VIP {}", vip);
|
||||||
|
|
@ -318,6 +390,7 @@ impl VipManager {
|
||||||
|
|
||||||
for vip in managed_vips {
|
for vip in managed_vips {
|
||||||
let mut vip_state = state.get(&vip).copied().unwrap_or_default();
|
let mut vip_state = state.get(&vip).copied().unwrap_or_default();
|
||||||
|
vip_state.drain_started_at = None;
|
||||||
|
|
||||||
if vip_state.owned {
|
if vip_state.owned {
|
||||||
info!("Releasing local VIP {} for shutdown", vip);
|
info!("Releasing local VIP {} for shutdown", vip);
|
||||||
|
|
@ -372,6 +445,7 @@ mod tests {
|
||||||
use crate::bgp_client::{BgpClient, Result};
|
use crate::bgp_client::{BgpClient, Result};
|
||||||
use crate::vip_owner::VipOwnershipError;
|
use crate::vip_owner::VipOwnershipError;
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
/// Mock BGP client for testing
|
/// Mock BGP client for testing
|
||||||
struct MockBgpClient {
|
struct MockBgpClient {
|
||||||
|
|
@ -459,6 +533,8 @@ mod tests {
|
||||||
metadata,
|
metadata,
|
||||||
next_hop,
|
next_hop,
|
||||||
Some(mock_owner.clone()),
|
Some(mock_owner.clone()),
|
||||||
|
String::new(),
|
||||||
|
Duration::from_secs(0),
|
||||||
);
|
);
|
||||||
|
|
||||||
let vip: IpAddr = "10.0.1.100".parse().unwrap();
|
let vip: IpAddr = "10.0.1.100".parse().unwrap();
|
||||||
|
|
@ -484,4 +560,61 @@ mod tests {
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_vip_drain_withdraws_before_releasing_vip() {
|
||||||
|
let events = Arc::new(Mutex::new(Vec::new()));
|
||||||
|
let mock_bgp = Arc::new(MockBgpClient::new(events.clone()));
|
||||||
|
let mock_owner = Arc::new(MockVipOwner::new(events.clone()));
|
||||||
|
let metadata = Arc::new(LbMetadataStore::new_in_memory());
|
||||||
|
let next_hop = "10.0.0.1".parse().unwrap();
|
||||||
|
let drain_path = std::env::temp_dir().join(format!(
|
||||||
|
"fiberlb-drain-{}",
|
||||||
|
SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_nanos()
|
||||||
|
));
|
||||||
|
|
||||||
|
let manager = VipManager::new(
|
||||||
|
mock_bgp,
|
||||||
|
metadata,
|
||||||
|
next_hop,
|
||||||
|
Some(mock_owner.clone()),
|
||||||
|
drain_path.display().to_string(),
|
||||||
|
Duration::from_millis(200),
|
||||||
|
);
|
||||||
|
|
||||||
|
let vip: IpAddr = "10.0.1.100".parse().unwrap();
|
||||||
|
manager.advertise_vip(vip).await.unwrap();
|
||||||
|
|
||||||
|
tokio::fs::write(&drain_path, b"1").await.unwrap();
|
||||||
|
let mut active_vips = HashSet::new();
|
||||||
|
active_vips.insert(vip);
|
||||||
|
|
||||||
|
manager
|
||||||
|
.reconcile_advertisements(&active_vips, true)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(mock_owner.released.lock().unwrap().is_empty());
|
||||||
|
assert_eq!(manager.get_advertised_vips().await, Vec::<IpAddr>::new());
|
||||||
|
|
||||||
|
sleep(Duration::from_millis(250)).await;
|
||||||
|
manager
|
||||||
|
.reconcile_advertisements(&active_vips, true)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
assert!(mock_owner.released.lock().unwrap().contains(&vip));
|
||||||
|
|
||||||
|
tokio::fs::remove_file(&drain_path).await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
events.lock().unwrap().clone(),
|
||||||
|
vec![
|
||||||
|
format!("own:{vip}"),
|
||||||
|
format!("announce:{vip}"),
|
||||||
|
format!("withdraw:{vip}"),
|
||||||
|
format!("unown:{vip}"),
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
18
flake.nix
18
flake.nix
|
|
@ -936,6 +936,24 @@
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
fiberlb-native-bgp-interop-vm-smoke = pkgs.testers.runNixOSTest (
|
||||||
|
import ./nix/tests/fiberlb-native-bgp-interop-vm-smoke.nix {
|
||||||
|
inherit pkgs;
|
||||||
|
photoncloudPackages = self.packages.${system};
|
||||||
|
photoncloudModule = self.nixosModules.default;
|
||||||
|
nixNosModule = nix-nos.nixosModules.default;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
fiberlb-native-bgp-ecmp-drain-vm-smoke = pkgs.testers.runNixOSTest (
|
||||||
|
import ./nix/tests/fiberlb-native-bgp-ecmp-drain-vm-smoke.nix {
|
||||||
|
inherit pkgs;
|
||||||
|
photoncloudPackages = self.packages.${system};
|
||||||
|
photoncloudModule = self.nixosModules.default;
|
||||||
|
nixNosModule = nix-nos.nixosModules.default;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
deployer-bootstrap-e2e = pkgs.runCommand "deployer-bootstrap-e2e" {
|
deployer-bootstrap-e2e = pkgs.runCommand "deployer-bootstrap-e2e" {
|
||||||
nativeBuildInputs = with pkgs; [
|
nativeBuildInputs = with pkgs; [
|
||||||
bash
|
bash
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,47 @@ let
|
||||||
default = "";
|
default = "";
|
||||||
description = "Optional description used for logs and operators.";
|
description = "Optional description used for logs and operators.";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
med = lib.mkOption {
|
||||||
|
type = lib.types.nullOr lib.types.ints.unsigned;
|
||||||
|
default = null;
|
||||||
|
description = "Optional MED to attach to VIP announcements sent to this peer.";
|
||||||
|
};
|
||||||
|
|
||||||
|
communities = lib.mkOption {
|
||||||
|
type = lib.types.listOf lib.types.str;
|
||||||
|
default = [ ];
|
||||||
|
description = "Optional standard BGP communities to attach to VIP announcements sent to this peer.";
|
||||||
|
example = [ "65001:100" "65001:200" ];
|
||||||
|
};
|
||||||
|
|
||||||
|
bfd = {
|
||||||
|
enable = lib.mkEnableOption "single-hop BFD for this BGP peer";
|
||||||
|
|
||||||
|
desiredMinTxMillis = lib.mkOption {
|
||||||
|
type = lib.types.ints.positive;
|
||||||
|
default = 300;
|
||||||
|
description = "Desired BFD transmit interval in milliseconds.";
|
||||||
|
};
|
||||||
|
|
||||||
|
requiredMinRxMillis = lib.mkOption {
|
||||||
|
type = lib.types.ints.positive;
|
||||||
|
default = 300;
|
||||||
|
description = "Required BFD receive interval in milliseconds.";
|
||||||
|
};
|
||||||
|
|
||||||
|
detectMultiplier = lib.mkOption {
|
||||||
|
type = lib.types.ints.positive;
|
||||||
|
default = 3;
|
||||||
|
description = "BFD detection multiplier.";
|
||||||
|
};
|
||||||
|
|
||||||
|
bootstrapTimeoutSecs = lib.mkOption {
|
||||||
|
type = lib.types.ints.positive;
|
||||||
|
default = 10;
|
||||||
|
description = "How long FiberLB waits for the BFD session to reach Up after BGP establishment.";
|
||||||
|
};
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -46,6 +87,8 @@ let
|
||||||
};
|
};
|
||||||
vip_advertisement = {
|
vip_advertisement = {
|
||||||
interval_secs = cfg.vipCheckIntervalSecs;
|
interval_secs = cfg.vipCheckIntervalSecs;
|
||||||
|
drain_file = cfg.vipDrain.filePath;
|
||||||
|
drain_hold_time_secs = cfg.vipDrain.holdTimeSecs;
|
||||||
};
|
};
|
||||||
vip_ownership = {
|
vip_ownership = {
|
||||||
enabled = cfg.vipOwnership.enable;
|
enabled = cfg.vipOwnership.enable;
|
||||||
|
|
@ -66,6 +109,18 @@ let
|
||||||
peers = map
|
peers = map
|
||||||
(peer: {
|
(peer: {
|
||||||
inherit (peer) address port asn description;
|
inherit (peer) address port asn description;
|
||||||
|
export_policy = {
|
||||||
|
inherit (peer) communities;
|
||||||
|
} // lib.optionalAttrs (peer.med != null) {
|
||||||
|
med = peer.med;
|
||||||
|
};
|
||||||
|
bfd = {
|
||||||
|
enabled = peer.bfd.enable;
|
||||||
|
desired_min_tx_millis = peer.bfd.desiredMinTxMillis;
|
||||||
|
required_min_rx_millis = peer.bfd.requiredMinRxMillis;
|
||||||
|
detect_multiplier = peer.bfd.detectMultiplier;
|
||||||
|
bootstrap_timeout_secs = peer.bfd.bootstrapTimeoutSecs;
|
||||||
|
};
|
||||||
})
|
})
|
||||||
cfg.bgp.peers;
|
cfg.bgp.peers;
|
||||||
}
|
}
|
||||||
|
|
@ -169,6 +224,20 @@ in
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
vipDrain = {
|
||||||
|
filePath = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "/var/lib/fiberlb/drain";
|
||||||
|
description = "Presence of this file puts FiberLB into node drain mode.";
|
||||||
|
};
|
||||||
|
|
||||||
|
holdTimeSecs = lib.mkOption {
|
||||||
|
type = lib.types.ints.unsigned;
|
||||||
|
default = 5;
|
||||||
|
description = "How long FiberLB keeps a locally owned VIP after withdrawing it for drain.";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
bgp = {
|
bgp = {
|
||||||
enable = lib.mkEnableOption "FiberLB native BGP VIP advertisement";
|
enable = lib.mkEnableOption "FiberLB native BGP VIP advertisement";
|
||||||
|
|
||||||
|
|
|
||||||
745
nix/tests/fiberlb-native-bgp-ecmp-drain-vm-smoke.nix
Normal file
745
nix/tests/fiberlb-native-bgp-ecmp-drain-vm-smoke.nix
Normal file
|
|
@ -0,0 +1,745 @@
|
||||||
|
{
|
||||||
|
pkgs,
|
||||||
|
photoncloudPackages,
|
||||||
|
photoncloudModule,
|
||||||
|
nixNosModule,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
edgeZebraConfig = pkgs.writeText "fiberlb-ecmp-edge-zebra.conf" ''
|
||||||
|
hostname edge-zebra
|
||||||
|
log stdout debugging
|
||||||
|
'';
|
||||||
|
edgeBgpdConfig = pkgs.writeText "fiberlb-ecmp-edge-bgpd.conf" ''
|
||||||
|
hostname edge-frr
|
||||||
|
log stdout debugging
|
||||||
|
|
||||||
|
router bgp 65020
|
||||||
|
bgp router-id 192.168.100.1
|
||||||
|
no bgp ebgp-requires-policy
|
||||||
|
bgp bestpath as-path multipath-relax
|
||||||
|
neighbor 192.168.100.2 remote-as 65010
|
||||||
|
neighbor 192.168.100.2 description fiberlb-a
|
||||||
|
neighbor 192.168.100.3 remote-as 65010
|
||||||
|
neighbor 192.168.100.3 description fiberlb-b
|
||||||
|
!
|
||||||
|
address-family ipv4 unicast
|
||||||
|
maximum-paths 8
|
||||||
|
neighbor 192.168.100.2 activate
|
||||||
|
neighbor 192.168.100.3 activate
|
||||||
|
exit-address-family
|
||||||
|
!
|
||||||
|
'';
|
||||||
|
iamProtoDir = ../../iam/proto;
|
||||||
|
iamProto = "iam.proto";
|
||||||
|
fiberlbProtoDir = ../../fiberlb/crates/fiberlb-api/proto;
|
||||||
|
fiberlbProto = "fiberlb.proto";
|
||||||
|
backendScriptA = pkgs.writeText "fiberlb-ecmp-backend-a.py" ''
|
||||||
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
|
||||||
|
|
||||||
|
class Handler(BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
body = b"fiberlb ecmp backend a\n"
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/plain; charset=utf-8")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
HTTPServer(("127.0.0.1", 18081), Handler).serve_forever()
|
||||||
|
'';
|
||||||
|
backendScriptB = pkgs.writeText "fiberlb-ecmp-backend-b.py" ''
|
||||||
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
|
||||||
|
|
||||||
|
class Handler(BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
body = b"fiberlb ecmp backend b\n"
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/plain; charset=utf-8")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
HTTPServer(("127.0.0.1", 18081), Handler).serve_forever()
|
||||||
|
'';
|
||||||
|
in
|
||||||
|
{
|
||||||
|
name = "fiberlb-native-bgp-ecmp-drain-vm-smoke";
|
||||||
|
|
||||||
|
nodes = {
|
||||||
|
edge =
|
||||||
|
{ ... }:
|
||||||
|
{
|
||||||
|
networking.hostName = "edge";
|
||||||
|
networking.useDHCP = false;
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
virtualisation.vlans = [ 1 ];
|
||||||
|
networking.interfaces.eth1.ipv4.addresses = [
|
||||||
|
{
|
||||||
|
address = "192.168.100.1";
|
||||||
|
prefixLength = 24;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
curl
|
||||||
|
frr
|
||||||
|
iproute2
|
||||||
|
jq
|
||||||
|
];
|
||||||
|
|
||||||
|
users.groups.frr = { };
|
||||||
|
users.groups.frrvty = { };
|
||||||
|
users.users.frr = {
|
||||||
|
isSystemUser = true;
|
||||||
|
group = "frr";
|
||||||
|
extraGroups = [ "frrvty" ];
|
||||||
|
};
|
||||||
|
users.users.root.extraGroups = [ "frrvty" ];
|
||||||
|
|
||||||
|
systemd.services.frr-zebra = {
|
||||||
|
description = "FRR zebra for FiberLB ECMP smoke";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
RuntimeDirectory = "frr";
|
||||||
|
RuntimeDirectoryMode = "0755";
|
||||||
|
ExecStartPre = "${pkgs.runtimeShell} -lc '${pkgs.coreutils}/bin/install -d -o root -g root /run/frr /var/run/frr && ${pkgs.coreutils}/bin/rm -f /run/frr/zebra.pid /var/run/frr/zebra.pid'";
|
||||||
|
ExecStart = "${pkgs.frr}/libexec/frr/zebra -f ${edgeZebraConfig} -A 127.0.0.1 -P 2601 -i /run/frr/zebra.pid -z /run/frr/zserv.api -u root -g root --log stdout";
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = "2s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.frr-bgpd = {
|
||||||
|
description = "FRR bgpd for FiberLB ECMP smoke";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" "frr-zebra.service" ];
|
||||||
|
requires = [ "frr-zebra.service" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
RuntimeDirectory = "frr";
|
||||||
|
RuntimeDirectoryMode = "0755";
|
||||||
|
ExecStartPre = "${pkgs.runtimeShell} -lc '${pkgs.coreutils}/bin/install -d -o root -g root /run/frr /var/run/frr && ${pkgs.coreutils}/bin/rm -f /run/frr/bgpd.pid /var/run/frr/bgpd.pid && for _ in $(seq 1 30); do [ -S /run/frr/zserv.api ] && exit 0; sleep 1; done; echo zserv socket did not appear >&2; exit 1'";
|
||||||
|
ExecStart = "${pkgs.frr}/libexec/frr/bgpd -f ${edgeBgpdConfig} -A 127.0.0.1 -P 2605 -p 179 -i /run/frr/bgpd.pid -z /run/frr/zserv.api -S --log stdout";
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = "2s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
system.stateVersion = "24.11";
|
||||||
|
};
|
||||||
|
|
||||||
|
lb_a =
|
||||||
|
{ ... }:
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
nixNosModule
|
||||||
|
photoncloudModule
|
||||||
|
];
|
||||||
|
|
||||||
|
networking.hostName = "lb-a";
|
||||||
|
networking.useDHCP = false;
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
virtualisation.vlans = [ 1 ];
|
||||||
|
networking.interfaces.eth1.ipv4.addresses = [
|
||||||
|
{
|
||||||
|
address = "192.168.100.2";
|
||||||
|
prefixLength = 24;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
curl
|
||||||
|
grpcurl
|
||||||
|
jq
|
||||||
|
python3
|
||||||
|
];
|
||||||
|
|
||||||
|
services.iam = {
|
||||||
|
enable = true;
|
||||||
|
package = photoncloudPackages.iam-server;
|
||||||
|
port = 50080;
|
||||||
|
httpPort = 8083;
|
||||||
|
storeBackend = "memory";
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.iam.environment = {
|
||||||
|
IAM_ALLOW_RANDOM_SIGNING_KEY = "1";
|
||||||
|
};
|
||||||
|
|
||||||
|
services.fiberlb = {
|
||||||
|
enable = true;
|
||||||
|
package = photoncloudPackages.fiberlb-server;
|
||||||
|
port = 50085;
|
||||||
|
iamAddr = "192.168.100.2:50080";
|
||||||
|
metadataBackend = "sqlite";
|
||||||
|
databaseUrl = "sqlite:/var/lib/fiberlb/metadata.db";
|
||||||
|
singleNode = true;
|
||||||
|
healthCheckIntervalSecs = 1;
|
||||||
|
healthCheckTimeoutSecs = 1;
|
||||||
|
vipCheckIntervalSecs = 1;
|
||||||
|
vipDrain.holdTimeSecs = 3;
|
||||||
|
vipOwnership = {
|
||||||
|
enable = true;
|
||||||
|
interface = "lo";
|
||||||
|
};
|
||||||
|
bgp = {
|
||||||
|
enable = true;
|
||||||
|
localAs = 65010;
|
||||||
|
routerId = "192.168.100.2";
|
||||||
|
nextHop = "192.168.100.2";
|
||||||
|
holdTimeSecs = 30;
|
||||||
|
keepaliveSecs = 10;
|
||||||
|
peers = [
|
||||||
|
{
|
||||||
|
address = "192.168.100.1";
|
||||||
|
port = 179;
|
||||||
|
asn = 65020;
|
||||||
|
description = "edge";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.mock-backend = {
|
||||||
|
description = "FiberLB ECMP backend A";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
ExecStart = "${pkgs.python3}/bin/python ${backendScriptA}";
|
||||||
|
Restart = "always";
|
||||||
|
RestartSec = "1s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
system.stateVersion = "24.11";
|
||||||
|
};
|
||||||
|
|
||||||
|
lb_b =
|
||||||
|
{ ... }:
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
nixNosModule
|
||||||
|
photoncloudModule
|
||||||
|
];
|
||||||
|
|
||||||
|
networking.hostName = "lb-b";
|
||||||
|
networking.useDHCP = false;
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
virtualisation.vlans = [ 1 ];
|
||||||
|
networking.interfaces.eth1.ipv4.addresses = [
|
||||||
|
{
|
||||||
|
address = "192.168.100.3";
|
||||||
|
prefixLength = 24;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
curl
|
||||||
|
grpcurl
|
||||||
|
jq
|
||||||
|
python3
|
||||||
|
];
|
||||||
|
|
||||||
|
services.iam = {
|
||||||
|
enable = true;
|
||||||
|
package = photoncloudPackages.iam-server;
|
||||||
|
port = 50080;
|
||||||
|
httpPort = 8083;
|
||||||
|
storeBackend = "memory";
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.iam.environment = {
|
||||||
|
IAM_ALLOW_RANDOM_SIGNING_KEY = "1";
|
||||||
|
};
|
||||||
|
|
||||||
|
services.fiberlb = {
|
||||||
|
enable = true;
|
||||||
|
package = photoncloudPackages.fiberlb-server;
|
||||||
|
port = 50085;
|
||||||
|
iamAddr = "192.168.100.3:50080";
|
||||||
|
metadataBackend = "sqlite";
|
||||||
|
databaseUrl = "sqlite:/var/lib/fiberlb/metadata.db";
|
||||||
|
singleNode = true;
|
||||||
|
healthCheckIntervalSecs = 1;
|
||||||
|
healthCheckTimeoutSecs = 1;
|
||||||
|
vipCheckIntervalSecs = 1;
|
||||||
|
vipDrain.holdTimeSecs = 3;
|
||||||
|
vipOwnership = {
|
||||||
|
enable = true;
|
||||||
|
interface = "lo";
|
||||||
|
};
|
||||||
|
bgp = {
|
||||||
|
enable = true;
|
||||||
|
localAs = 65010;
|
||||||
|
routerId = "192.168.100.3";
|
||||||
|
nextHop = "192.168.100.3";
|
||||||
|
holdTimeSecs = 30;
|
||||||
|
keepaliveSecs = 10;
|
||||||
|
peers = [
|
||||||
|
{
|
||||||
|
address = "192.168.100.1";
|
||||||
|
port = 179;
|
||||||
|
asn = 65020;
|
||||||
|
description = "edge";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.mock-backend = {
|
||||||
|
description = "FiberLB ECMP backend B";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
ExecStart = "${pkgs.python3}/bin/python ${backendScriptB}";
|
||||||
|
Restart = "always";
|
||||||
|
RestartSec = "1s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
system.stateVersion = "24.11";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
testScript = ''
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import shlex
|
||||||
|
import time
|
||||||
|
|
||||||
|
IAM_PROTO_DIR = "${iamProtoDir}"
|
||||||
|
IAM_PROTO = "${iamProto}"
|
||||||
|
FIBERLB_PROTO_DIR = "${fiberlbProtoDir}"
|
||||||
|
FIBERLB_PROTO = "${fiberlbProto}"
|
||||||
|
METRIC_RE = re.compile(r"^([a-zA-Z_:][a-zA-Z0-9_:]*)(?:\{([^}]*)\})?\s+([-+0-9.eE]+)$")
|
||||||
|
VIP = "203.0.113.77"
|
||||||
|
VIP_PREFIX = f"{VIP}/32"
|
||||||
|
LISTENER_URL = f"http://{VIP}:18080/"
|
||||||
|
|
||||||
|
def grpcurl_json(machine, endpoint, import_path, proto, service, payload, headers=None):
|
||||||
|
header_args = ""
|
||||||
|
for header in headers or []:
|
||||||
|
header_args += f" -H {shlex.quote(header)}"
|
||||||
|
command = (
|
||||||
|
f"grpcurl -plaintext{header_args} "
|
||||||
|
f"-import-path {shlex.quote(import_path)} "
|
||||||
|
f"-proto {shlex.quote(proto)} "
|
||||||
|
f"-d {shlex.quote(json.dumps(payload))} "
|
||||||
|
f"{shlex.quote(endpoint)} {shlex.quote(service)}"
|
||||||
|
)
|
||||||
|
status, output = machine.execute(f"timeout 15 sh -lc {shlex.quote(command + ' 2>&1')}")
|
||||||
|
if status != 0:
|
||||||
|
raise AssertionError(
|
||||||
|
"grpcurl failed"
|
||||||
|
f" service={service}"
|
||||||
|
f" status={status}"
|
||||||
|
f" payload={json.dumps(payload, sort_keys=True)}"
|
||||||
|
f" output={output}"
|
||||||
|
)
|
||||||
|
return json.loads(output)
|
||||||
|
|
||||||
|
def issue_project_admin_token(machine, org_id, project_id):
|
||||||
|
principal_id = f"fiberlb-ecmp-{machine.name}-{int(time.time())}"
|
||||||
|
deadline = time.time() + 120
|
||||||
|
|
||||||
|
def retry(action):
|
||||||
|
last_error = None
|
||||||
|
while time.time() < deadline:
|
||||||
|
try:
|
||||||
|
return action()
|
||||||
|
except Exception as exc:
|
||||||
|
last_error = exc
|
||||||
|
time.sleep(2)
|
||||||
|
raise AssertionError(f"IAM bootstrap timed out: {last_error}")
|
||||||
|
|
||||||
|
retry(lambda: grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50080",
|
||||||
|
IAM_PROTO_DIR,
|
||||||
|
IAM_PROTO,
|
||||||
|
"iam.v1.IamAdmin/CreatePrincipal",
|
||||||
|
{
|
||||||
|
"id": principal_id,
|
||||||
|
"kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
||||||
|
"name": principal_id,
|
||||||
|
"orgId": org_id,
|
||||||
|
"projectId": project_id,
|
||||||
|
},
|
||||||
|
))
|
||||||
|
retry(lambda: grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50080",
|
||||||
|
IAM_PROTO_DIR,
|
||||||
|
IAM_PROTO,
|
||||||
|
"iam.v1.IamAdmin/CreateBinding",
|
||||||
|
{
|
||||||
|
"principal": {
|
||||||
|
"kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
||||||
|
"id": principal_id,
|
||||||
|
},
|
||||||
|
"role": "roles/ProjectAdmin",
|
||||||
|
"scope": {
|
||||||
|
"project": {
|
||||||
|
"id": project_id,
|
||||||
|
"orgId": org_id,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
))
|
||||||
|
token_response = retry(lambda: grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50080",
|
||||||
|
IAM_PROTO_DIR,
|
||||||
|
IAM_PROTO,
|
||||||
|
"iam.v1.IamToken/IssueToken",
|
||||||
|
{
|
||||||
|
"principalId": principal_id,
|
||||||
|
"principalKind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
||||||
|
"scope": {
|
||||||
|
"project": {
|
||||||
|
"id": project_id,
|
||||||
|
"orgId": org_id,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ttlSeconds": 3600,
|
||||||
|
},
|
||||||
|
))
|
||||||
|
return token_response["token"]
|
||||||
|
|
||||||
|
def create_load_balancer(machine, token, name_suffix):
|
||||||
|
response = grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50085",
|
||||||
|
FIBERLB_PROTO_DIR,
|
||||||
|
FIBERLB_PROTO,
|
||||||
|
"fiberlb.v1.LoadBalancerService/CreateLoadBalancer",
|
||||||
|
{
|
||||||
|
"name": f"bgp-ecmp-{name_suffix}",
|
||||||
|
"orgId": "bgp-ecmp-org",
|
||||||
|
"projectId": "bgp-ecmp-project",
|
||||||
|
"description": f"native bgp ecmp {name_suffix}",
|
||||||
|
"vipAddress": VIP,
|
||||||
|
},
|
||||||
|
headers=[f"authorization: Bearer {token}"],
|
||||||
|
)
|
||||||
|
lb_id = response["loadbalancer"]["id"]
|
||||||
|
pool_id = grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50085",
|
||||||
|
FIBERLB_PROTO_DIR,
|
||||||
|
FIBERLB_PROTO,
|
||||||
|
"fiberlb.v1.PoolService/CreatePool",
|
||||||
|
{
|
||||||
|
"name": f"bgp-ecmp-pool-{name_suffix}",
|
||||||
|
"loadbalancerId": lb_id,
|
||||||
|
"algorithm": "POOL_ALGORITHM_ROUND_ROBIN",
|
||||||
|
"protocol": "POOL_PROTOCOL_TCP",
|
||||||
|
},
|
||||||
|
headers=[f"authorization: Bearer {token}"],
|
||||||
|
)["pool"]["id"]
|
||||||
|
backend_id = grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50085",
|
||||||
|
FIBERLB_PROTO_DIR,
|
||||||
|
FIBERLB_PROTO,
|
||||||
|
"fiberlb.v1.BackendService/CreateBackend",
|
||||||
|
{
|
||||||
|
"name": f"bgp-ecmp-backend-{name_suffix}",
|
||||||
|
"poolId": pool_id,
|
||||||
|
"address": "127.0.0.1",
|
||||||
|
"port": 18081,
|
||||||
|
"weight": 1,
|
||||||
|
},
|
||||||
|
headers=[f"authorization: Bearer {token}"],
|
||||||
|
)["backend"]["id"]
|
||||||
|
grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50085",
|
||||||
|
FIBERLB_PROTO_DIR,
|
||||||
|
FIBERLB_PROTO,
|
||||||
|
"fiberlb.v1.HealthCheckService/CreateHealthCheck",
|
||||||
|
{
|
||||||
|
"name": f"bgp-ecmp-health-{name_suffix}",
|
||||||
|
"poolId": pool_id,
|
||||||
|
"type": "HEALTH_CHECK_TYPE_HTTP",
|
||||||
|
"intervalSeconds": 1,
|
||||||
|
"timeoutSeconds": 1,
|
||||||
|
"healthyThreshold": 1,
|
||||||
|
"unhealthyThreshold": 1,
|
||||||
|
"httpConfig": {
|
||||||
|
"method": "GET",
|
||||||
|
"path": "/",
|
||||||
|
"expectedCodes": [200],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
headers=[f"authorization: Bearer {token}"],
|
||||||
|
)
|
||||||
|
grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50085",
|
||||||
|
FIBERLB_PROTO_DIR,
|
||||||
|
FIBERLB_PROTO,
|
||||||
|
"fiberlb.v1.ListenerService/CreateListener",
|
||||||
|
{
|
||||||
|
"name": f"bgp-ecmp-listener-{name_suffix}",
|
||||||
|
"loadbalancerId": lb_id,
|
||||||
|
"protocol": "LISTENER_PROTOCOL_TCP",
|
||||||
|
"port": 18080,
|
||||||
|
"defaultPoolId": pool_id,
|
||||||
|
},
|
||||||
|
headers=[f"authorization: Bearer {token}"],
|
||||||
|
)
|
||||||
|
return backend_id
|
||||||
|
|
||||||
|
def wait_for_backend_status(machine, status, backend_id, token):
|
||||||
|
machine.wait_until_succeeds(
|
||||||
|
"grpcurl -plaintext "
|
||||||
|
f"-H {shlex.quote('authorization: Bearer ' + token)} "
|
||||||
|
f"-import-path {shlex.quote(FIBERLB_PROTO_DIR)} "
|
||||||
|
f"-proto {shlex.quote(FIBERLB_PROTO)} "
|
||||||
|
f"-d {shlex.quote(json.dumps({'id': backend_id}))} "
|
||||||
|
"127.0.0.1:50085 fiberlb.v1.BackendService/GetBackend "
|
||||||
|
f"| jq -e {shlex.quote(f'.backend.status == \"{status}\"')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def machine_diagnostics(machine, unit):
|
||||||
|
metrics = machine.succeed("curl -fsS http://127.0.0.1:9098/metrics || true")
|
||||||
|
service_status = machine.succeed(
|
||||||
|
f"systemctl status {shlex.quote(unit)} --no-pager || true"
|
||||||
|
)
|
||||||
|
journal = machine.succeed(
|
||||||
|
f"journalctl -u {shlex.quote(unit)} -n 200 --no-pager || true"
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
f"metrics:\n{metrics}\n"
|
||||||
|
f"systemctl status:\n{service_status}\n"
|
||||||
|
f"journal:\n{journal}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def edge_bgp_diagnostics():
|
||||||
|
bgpd_status = edge.succeed("systemctl status frr-bgpd.service --no-pager || true")
|
||||||
|
bgpd_journal = edge.succeed("journalctl -u frr-bgpd.service -n 200 --no-pager || true")
|
||||||
|
bgp_summary = edge.succeed("vtysh -c 'show ip bgp summary' || true")
|
||||||
|
bgp_route = edge.succeed(f"vtysh -c 'show ip bgp {VIP_PREFIX}' || true")
|
||||||
|
zebra_route = edge.succeed(f"vtysh -c 'show ip route {VIP_PREFIX}' || true")
|
||||||
|
kernel_route = edge.succeed(f"ip route show {VIP_PREFIX} || true")
|
||||||
|
return (
|
||||||
|
"edge frr-bgpd status:\n"
|
||||||
|
f"{bgpd_status}\n"
|
||||||
|
"edge frr-bgpd journal:\n"
|
||||||
|
f"{bgpd_journal}\n"
|
||||||
|
"edge BGP summary:\n"
|
||||||
|
f"{bgp_summary}\n"
|
||||||
|
f"edge BGP route {VIP_PREFIX}:\n"
|
||||||
|
f"{bgp_route}\n"
|
||||||
|
f"edge zebra route {VIP_PREFIX}:\n"
|
||||||
|
f"{zebra_route}\n"
|
||||||
|
f"edge kernel route {VIP_PREFIX}:\n"
|
||||||
|
f"{kernel_route}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_for_unit_or_dump(machine, unit):
|
||||||
|
deadline = time.time() + 120
|
||||||
|
while time.time() < deadline:
|
||||||
|
status, output = machine.execute(f"systemctl is-active {shlex.quote(unit)}")
|
||||||
|
state = output.strip()
|
||||||
|
if status == 0 and state == "active":
|
||||||
|
return
|
||||||
|
if state == "failed":
|
||||||
|
raise AssertionError(
|
||||||
|
f"unit {unit} failed to start\n{machine_diagnostics(machine, unit)}"
|
||||||
|
)
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
raise AssertionError(
|
||||||
|
f"unit {unit} did not become active before timeout\n{machine_diagnostics(machine, unit)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_for_command_or_dump(machine, command, unit=None, timeout=120):
|
||||||
|
deadline = time.time() + timeout
|
||||||
|
last_output = ""
|
||||||
|
while time.time() < deadline:
|
||||||
|
status, output = machine.execute(f"sh -lc {shlex.quote(command + ' 2>&1')}")
|
||||||
|
last_output = output
|
||||||
|
if status == 0:
|
||||||
|
return
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
diagnostics = f"last command output:\n{last_output}\n"
|
||||||
|
if unit is not None:
|
||||||
|
diagnostics += machine_diagnostics(machine, unit)
|
||||||
|
diagnostics += f"socket state:\n{machine.succeed('ss -ltnp || true')}\n"
|
||||||
|
raise AssertionError(
|
||||||
|
f"command did not succeed before timeout: {command}\n{diagnostics}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def parse_labels(label_blob):
|
||||||
|
if not label_blob:
|
||||||
|
return {}
|
||||||
|
labels = {}
|
||||||
|
for part in label_blob.split(","):
|
||||||
|
key, value = part.split("=", 1)
|
||||||
|
labels[key] = value.strip().strip('"')
|
||||||
|
return labels
|
||||||
|
|
||||||
|
def wait_for_metric(machine, metric_name, expected_value, labels=None):
|
||||||
|
expected_labels = labels or {}
|
||||||
|
deadline = time.time() + 60
|
||||||
|
last_exposition = ""
|
||||||
|
|
||||||
|
while time.time() < deadline:
|
||||||
|
exposition = machine.succeed("curl -fsS http://127.0.0.1:9098/metrics")
|
||||||
|
last_exposition = exposition
|
||||||
|
for line in exposition.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
match = METRIC_RE.match(line)
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
name, label_blob, value = match.groups()
|
||||||
|
if name != metric_name:
|
||||||
|
continue
|
||||||
|
if parse_labels(label_blob) != expected_labels:
|
||||||
|
continue
|
||||||
|
if abs(float(value) - float(expected_value)) < 0.0001:
|
||||||
|
return
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
raise AssertionError(
|
||||||
|
f"metric {metric_name} with labels={expected_labels} did not reach {expected_value}\n"
|
||||||
|
f"last metrics scrape:\n{last_exposition}\n"
|
||||||
|
f"{machine_diagnostics(machine, 'fiberlb.service')}\n"
|
||||||
|
f"{edge_bgp_diagnostics()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_for_local_vip(machine, present):
|
||||||
|
pattern = f"inet {VIP}/32"
|
||||||
|
if present:
|
||||||
|
machine.wait_until_succeeds(
|
||||||
|
f"ip -4 addr show dev lo | grep -F {shlex.quote(pattern)}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
deadline = time.time() + 60
|
||||||
|
while time.time() < deadline:
|
||||||
|
output = machine.succeed("ip -4 addr show dev lo || true")
|
||||||
|
if pattern not in output:
|
||||||
|
return
|
||||||
|
time.sleep(1)
|
||||||
|
raise AssertionError(f"VIP {VIP} still present on loopback")
|
||||||
|
|
||||||
|
def wait_for_edge_route(next_hops):
|
||||||
|
deadline = time.time() + 60
|
||||||
|
last_output = ""
|
||||||
|
while time.time() < deadline:
|
||||||
|
output = edge.succeed(f"ip route show {shlex.quote(VIP_PREFIX)} || true")
|
||||||
|
last_output = output
|
||||||
|
if all(next_hop in output for next_hop in next_hops):
|
||||||
|
return
|
||||||
|
time.sleep(1)
|
||||||
|
raise AssertionError(
|
||||||
|
f"edge route for {VIP_PREFIX} did not contain nexthops {next_hops}\n"
|
||||||
|
f"last kernel route output:\n{last_output}\n"
|
||||||
|
f"{edge_bgp_diagnostics()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_for_edge_route_absent(needle):
|
||||||
|
deadline = time.time() + 60
|
||||||
|
last_output = ""
|
||||||
|
while time.time() < deadline:
|
||||||
|
output = edge.succeed(f"ip route show {shlex.quote(VIP_PREFIX)} || true")
|
||||||
|
last_output = output
|
||||||
|
if needle not in output:
|
||||||
|
return
|
||||||
|
time.sleep(1)
|
||||||
|
raise AssertionError(
|
||||||
|
f"edge route for {VIP_PREFIX} still contained {needle}\n"
|
||||||
|
f"last kernel route output:\n{last_output}\n"
|
||||||
|
f"{edge_bgp_diagnostics()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_for_http_any():
|
||||||
|
edge.wait_until_succeeds(
|
||||||
|
f"curl -fsS --max-time 5 {shlex.quote(LISTENER_URL)} | grep -E 'fiberlb ecmp backend (a|b)'"
|
||||||
|
)
|
||||||
|
|
||||||
|
start_all()
|
||||||
|
serial_stdout_off()
|
||||||
|
|
||||||
|
wait_for_unit_or_dump(edge, "frr-zebra.service")
|
||||||
|
wait_for_command_or_dump(edge, "test -S /run/frr/zserv.api", "frr-zebra.service")
|
||||||
|
wait_for_unit_or_dump(edge, "frr-bgpd.service")
|
||||||
|
wait_for_command_or_dump(
|
||||||
|
edge,
|
||||||
|
"ss -ltnH '( sport = :179 )' | grep -q LISTEN",
|
||||||
|
"frr-bgpd.service",
|
||||||
|
)
|
||||||
|
|
||||||
|
for machine in [lb_a, lb_b]:
|
||||||
|
wait_for_unit_or_dump(machine, "iam.service")
|
||||||
|
wait_for_command_or_dump(machine, "ss -ltnH '( sport = :50080 )' | grep -q LISTEN", "iam.service")
|
||||||
|
wait_for_unit_or_dump(machine, "mock-backend.service")
|
||||||
|
wait_for_unit_or_dump(machine, "fiberlb.service")
|
||||||
|
wait_for_command_or_dump(machine, "ss -ltnH '( sport = :50085 )' | grep -q LISTEN", "fiberlb.service")
|
||||||
|
wait_for_command_or_dump(machine, "ss -ltnH '( sport = :9098 )' | grep -q LISTEN", "fiberlb.service")
|
||||||
|
|
||||||
|
wait_for_command_or_dump(
|
||||||
|
edge,
|
||||||
|
"vtysh -c 'show ip bgp neighbor 192.168.100.2' | grep -F 'BGP state = Established'",
|
||||||
|
"frr-bgpd.service",
|
||||||
|
)
|
||||||
|
wait_for_command_or_dump(
|
||||||
|
edge,
|
||||||
|
"vtysh -c 'show ip bgp neighbor 192.168.100.3' | grep -F 'BGP state = Established'",
|
||||||
|
"frr-bgpd.service",
|
||||||
|
)
|
||||||
|
|
||||||
|
token_a = issue_project_admin_token(lb_a, "bgp-ecmp-org", "bgp-ecmp-project")
|
||||||
|
token_b = issue_project_admin_token(lb_b, "bgp-ecmp-org", "bgp-ecmp-project")
|
||||||
|
backend_a = create_load_balancer(lb_a, token_a, "a")
|
||||||
|
backend_b = create_load_balancer(lb_b, token_b, "b")
|
||||||
|
|
||||||
|
wait_for_backend_status(lb_a, "BACKEND_STATUS_ONLINE", backend_a, token_a)
|
||||||
|
wait_for_backend_status(lb_b, "BACKEND_STATUS_ONLINE", backend_b, token_b)
|
||||||
|
wait_for_metric(lb_a, "fiberlb_bgp_connected_peers", 1)
|
||||||
|
wait_for_metric(lb_b, "fiberlb_bgp_connected_peers", 1)
|
||||||
|
wait_for_local_vip(lb_a, True)
|
||||||
|
wait_for_local_vip(lb_b, True)
|
||||||
|
|
||||||
|
wait_for_edge_route(["via 192.168.100.2", "via 192.168.100.3"])
|
||||||
|
wait_for_http_any()
|
||||||
|
|
||||||
|
lb_a.succeed("touch /var/lib/fiberlb/drain")
|
||||||
|
wait_for_metric(lb_a, "fiberlb_vip_drain_active", 1)
|
||||||
|
wait_for_edge_route(["via 192.168.100.3"])
|
||||||
|
wait_for_edge_route_absent("via 192.168.100.2")
|
||||||
|
wait_for_local_vip(lb_a, True)
|
||||||
|
edge.wait_until_succeeds(
|
||||||
|
f"curl -fsS --max-time 5 {shlex.quote(LISTENER_URL)} | grep -F 'fiberlb ecmp backend b'"
|
||||||
|
)
|
||||||
|
|
||||||
|
time.sleep(4)
|
||||||
|
wait_for_local_vip(lb_a, False)
|
||||||
|
|
||||||
|
lb_a.succeed("rm -f /var/lib/fiberlb/drain")
|
||||||
|
wait_for_metric(lb_a, "fiberlb_vip_drain_active", 0)
|
||||||
|
wait_for_local_vip(lb_a, True)
|
||||||
|
wait_for_edge_route(["via 192.168.100.2", "via 192.168.100.3"])
|
||||||
|
wait_for_http_any()
|
||||||
|
'';
|
||||||
|
}
|
||||||
737
nix/tests/fiberlb-native-bgp-interop-vm-smoke.nix
Normal file
737
nix/tests/fiberlb-native-bgp-interop-vm-smoke.nix
Normal file
|
|
@ -0,0 +1,737 @@
|
||||||
|
{
|
||||||
|
pkgs,
|
||||||
|
photoncloudPackages,
|
||||||
|
photoncloudModule,
|
||||||
|
nixNosModule,
|
||||||
|
}:
|
||||||
|
|
||||||
|
let
|
||||||
|
frrZebraConfig = pkgs.writeText "fiberlb-interop-frr-zebra.conf" ''
|
||||||
|
hostname interop-zebra
|
||||||
|
log stdout debugging
|
||||||
|
'';
|
||||||
|
frrBgpdConfig = pkgs.writeText "fiberlb-interop-frr-bgpd.conf" ''
|
||||||
|
hostname interop-frr
|
||||||
|
log stdout debugging
|
||||||
|
|
||||||
|
router bgp 65020
|
||||||
|
bgp router-id 192.168.100.1
|
||||||
|
no bgp ebgp-requires-policy
|
||||||
|
neighbor 192.168.100.2 remote-as 65010
|
||||||
|
neighbor 192.168.100.2 description fiberlb-frr
|
||||||
|
!
|
||||||
|
address-family ipv4 unicast
|
||||||
|
neighbor 192.168.100.2 activate
|
||||||
|
exit-address-family
|
||||||
|
!
|
||||||
|
'';
|
||||||
|
birdConfig = pkgs.writeText "fiberlb-interop-bird.conf" ''
|
||||||
|
router id 192.168.100.3;
|
||||||
|
|
||||||
|
protocol device {}
|
||||||
|
|
||||||
|
protocol kernel {
|
||||||
|
ipv4 {
|
||||||
|
import none;
|
||||||
|
export none;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
protocol bgp fiberlb_peer {
|
||||||
|
local 192.168.100.3 as 65030;
|
||||||
|
neighbor 192.168.100.2 as 65010;
|
||||||
|
|
||||||
|
ipv4 {
|
||||||
|
import all;
|
||||||
|
export none;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
'';
|
||||||
|
|
||||||
|
gobgpdConfig = pkgs.writeText "fiberlb-interop-gobgpd.json" (builtins.toJSON {
|
||||||
|
global = {
|
||||||
|
config = {
|
||||||
|
as = 65040;
|
||||||
|
router-id = "192.168.100.4";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
neighbors = [
|
||||||
|
{
|
||||||
|
config = {
|
||||||
|
neighbor-address = "192.168.100.2";
|
||||||
|
peer-as = 65010;
|
||||||
|
description = "fiberlb-gobgp";
|
||||||
|
};
|
||||||
|
}
|
||||||
|
];
|
||||||
|
});
|
||||||
|
|
||||||
|
iamProtoDir = ../../iam/proto;
|
||||||
|
iamProto = "iam.proto";
|
||||||
|
fiberlbProtoDir = ../../fiberlb/crates/fiberlb-api/proto;
|
||||||
|
fiberlbProto = "fiberlb.proto";
|
||||||
|
backendScript = pkgs.writeText "fiberlb-interop-backend.py" ''
|
||||||
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
|
||||||
|
|
||||||
|
class Handler(BaseHTTPRequestHandler):
|
||||||
|
def do_GET(self):
|
||||||
|
body = b"fiberlb interop backend\n"
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/plain; charset=utf-8")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
HTTPServer(("127.0.0.1", 18081), Handler).serve_forever()
|
||||||
|
'';
|
||||||
|
in
|
||||||
|
{
|
||||||
|
name = "fiberlb-native-bgp-interop-vm-smoke";
|
||||||
|
|
||||||
|
nodes = {
|
||||||
|
frr =
|
||||||
|
{ ... }:
|
||||||
|
{
|
||||||
|
networking.hostName = "frr";
|
||||||
|
networking.useDHCP = false;
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
virtualisation.vlans = [ 1 ];
|
||||||
|
networking.interfaces.eth1.ipv4.addresses = [
|
||||||
|
{
|
||||||
|
address = "192.168.100.1";
|
||||||
|
prefixLength = 24;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
curl
|
||||||
|
frr
|
||||||
|
jq
|
||||||
|
iproute2
|
||||||
|
];
|
||||||
|
|
||||||
|
users.groups.frr = { };
|
||||||
|
users.groups.frrvty = { };
|
||||||
|
users.users.frr = {
|
||||||
|
isSystemUser = true;
|
||||||
|
group = "frr";
|
||||||
|
extraGroups = [ "frrvty" ];
|
||||||
|
};
|
||||||
|
users.users.root.extraGroups = [ "frrvty" ];
|
||||||
|
|
||||||
|
systemd.services.frr-zebra = {
|
||||||
|
description = "FRR zebra for FiberLB interop smoke";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
RuntimeDirectory = "frr";
|
||||||
|
RuntimeDirectoryMode = "0755";
|
||||||
|
ExecStartPre = "${pkgs.runtimeShell} -lc '${pkgs.coreutils}/bin/install -d -o root -g root /run/frr /var/run/frr && ${pkgs.coreutils}/bin/rm -f /run/frr/zebra.pid /var/run/frr/zebra.pid'";
|
||||||
|
ExecStart = "${pkgs.frr}/libexec/frr/zebra -f ${frrZebraConfig} -A 127.0.0.1 -P 2601 -i /run/frr/zebra.pid -z /run/frr/zserv.api -u root -g root --log stdout";
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = "2s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.frr-bgpd = {
|
||||||
|
description = "FRR bgpd for FiberLB interop smoke";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" "frr-zebra.service" ];
|
||||||
|
requires = [ "frr-zebra.service" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
RuntimeDirectory = "frr";
|
||||||
|
RuntimeDirectoryMode = "0755";
|
||||||
|
ExecStartPre = "${pkgs.runtimeShell} -lc '${pkgs.coreutils}/bin/install -d -o root -g root /run/frr /var/run/frr && ${pkgs.coreutils}/bin/rm -f /run/frr/bgpd.pid /var/run/frr/bgpd.pid && for _ in $(seq 1 30); do [ -S /run/frr/zserv.api ] && exit 0; sleep 1; done; echo zserv socket did not appear >&2; exit 1'";
|
||||||
|
ExecStart = "${pkgs.frr}/libexec/frr/bgpd -f ${frrBgpdConfig} -A 127.0.0.1 -P 2605 -p 179 -i /run/frr/bgpd.pid -z /run/frr/zserv.api -S --log stdout";
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = "2s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
system.stateVersion = "24.11";
|
||||||
|
};
|
||||||
|
|
||||||
|
bird =
|
||||||
|
{ ... }:
|
||||||
|
{
|
||||||
|
networking.hostName = "bird";
|
||||||
|
networking.useDHCP = false;
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
virtualisation.vlans = [ 1 ];
|
||||||
|
networking.interfaces.eth1.ipv4.addresses = [
|
||||||
|
{
|
||||||
|
address = "192.168.100.3";
|
||||||
|
prefixLength = 24;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
bird2
|
||||||
|
jq
|
||||||
|
];
|
||||||
|
|
||||||
|
systemd.services.bird-peer = {
|
||||||
|
description = "BIRD peer for FiberLB interop smoke";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
ExecStart = "${pkgs.bird2}/bin/bird -f -c ${birdConfig} -s /run/bird.ctl";
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = "2s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
system.stateVersion = "24.11";
|
||||||
|
};
|
||||||
|
|
||||||
|
gobgp =
|
||||||
|
{ ... }:
|
||||||
|
{
|
||||||
|
networking.hostName = "gobgp";
|
||||||
|
networking.useDHCP = false;
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
virtualisation.vlans = [ 1 ];
|
||||||
|
networking.interfaces.eth1.ipv4.addresses = [
|
||||||
|
{
|
||||||
|
address = "192.168.100.4";
|
||||||
|
prefixLength = 24;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
gobgp
|
||||||
|
gobgpd
|
||||||
|
jq
|
||||||
|
];
|
||||||
|
|
||||||
|
systemd.services.gobgpd-peer = {
|
||||||
|
description = "GoBGP peer for FiberLB interop smoke";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
ExecStart = "${pkgs.gobgpd}/bin/gobgpd -t json -f ${gobgpdConfig} --api-hosts 127.0.0.1:50051 -p";
|
||||||
|
Restart = "on-failure";
|
||||||
|
RestartSec = "2s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
system.stateVersion = "24.11";
|
||||||
|
};
|
||||||
|
|
||||||
|
lb =
|
||||||
|
{ ... }:
|
||||||
|
{
|
||||||
|
imports = [
|
||||||
|
nixNosModule
|
||||||
|
photoncloudModule
|
||||||
|
];
|
||||||
|
|
||||||
|
networking.hostName = "lb";
|
||||||
|
networking.useDHCP = false;
|
||||||
|
networking.firewall.enable = false;
|
||||||
|
virtualisation.vlans = [ 1 ];
|
||||||
|
networking.interfaces.eth1.ipv4.addresses = [
|
||||||
|
{
|
||||||
|
address = "192.168.100.2";
|
||||||
|
prefixLength = 24;
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
environment.systemPackages = with pkgs; [
|
||||||
|
curl
|
||||||
|
grpcurl
|
||||||
|
jq
|
||||||
|
python3
|
||||||
|
];
|
||||||
|
|
||||||
|
services.iam = {
|
||||||
|
enable = true;
|
||||||
|
package = photoncloudPackages.iam-server;
|
||||||
|
port = 50080;
|
||||||
|
httpPort = 8083;
|
||||||
|
storeBackend = "memory";
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.iam.environment = {
|
||||||
|
IAM_ALLOW_RANDOM_SIGNING_KEY = "1";
|
||||||
|
};
|
||||||
|
|
||||||
|
services.fiberlb = {
|
||||||
|
enable = true;
|
||||||
|
package = photoncloudPackages.fiberlb-server;
|
||||||
|
port = 50085;
|
||||||
|
iamAddr = "192.168.100.2:50080";
|
||||||
|
metadataBackend = "sqlite";
|
||||||
|
databaseUrl = "sqlite:/var/lib/fiberlb/metadata.db";
|
||||||
|
singleNode = true;
|
||||||
|
healthCheckIntervalSecs = 1;
|
||||||
|
healthCheckTimeoutSecs = 1;
|
||||||
|
vipCheckIntervalSecs = 1;
|
||||||
|
vipOwnership = {
|
||||||
|
enable = true;
|
||||||
|
interface = "lo";
|
||||||
|
};
|
||||||
|
bgp = {
|
||||||
|
enable = true;
|
||||||
|
localAs = 65010;
|
||||||
|
routerId = "192.168.100.2";
|
||||||
|
nextHop = "192.168.100.2";
|
||||||
|
holdTimeSecs = 9;
|
||||||
|
keepaliveSecs = 3;
|
||||||
|
peers = [
|
||||||
|
{
|
||||||
|
address = "192.168.100.1";
|
||||||
|
port = 179;
|
||||||
|
asn = 65020;
|
||||||
|
description = "frr-peer";
|
||||||
|
med = 10;
|
||||||
|
communities = [ "65010:101" ];
|
||||||
|
}
|
||||||
|
{
|
||||||
|
address = "192.168.100.3";
|
||||||
|
port = 179;
|
||||||
|
asn = 65030;
|
||||||
|
description = "bird-peer";
|
||||||
|
med = 20;
|
||||||
|
communities = [ "65010:202" ];
|
||||||
|
}
|
||||||
|
{
|
||||||
|
address = "192.168.100.4";
|
||||||
|
port = 179;
|
||||||
|
asn = 65040;
|
||||||
|
description = "gobgp-peer";
|
||||||
|
med = 30;
|
||||||
|
communities = [ "65010:303" ];
|
||||||
|
}
|
||||||
|
];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.services.mock-backend = {
|
||||||
|
description = "FiberLB interop backend";
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
after = [ "network.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "simple";
|
||||||
|
ExecStart = "${pkgs.python3}/bin/python ${backendScript}";
|
||||||
|
Restart = "always";
|
||||||
|
RestartSec = "1s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
system.stateVersion = "24.11";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
testScript = ''
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import shlex
|
||||||
|
import time
|
||||||
|
|
||||||
|
IAM_PROTO_DIR = "${iamProtoDir}"
|
||||||
|
IAM_PROTO = "${iamProto}"
|
||||||
|
FIBERLB_PROTO_DIR = "${fiberlbProtoDir}"
|
||||||
|
FIBERLB_PROTO = "${fiberlbProto}"
|
||||||
|
METRIC_RE = re.compile(r"^([a-zA-Z_:][a-zA-Z0-9_:]*)(?:\{([^}]*)\})?\s+([-+0-9.eE]+)$")
|
||||||
|
|
||||||
|
def grpcurl_json(machine, endpoint, import_path, proto, service, payload, headers=None):
|
||||||
|
header_args = ""
|
||||||
|
for header in headers or []:
|
||||||
|
header_args += f" -H {shlex.quote(header)}"
|
||||||
|
command = (
|
||||||
|
f"grpcurl -plaintext{header_args} "
|
||||||
|
f"-import-path {shlex.quote(import_path)} "
|
||||||
|
f"-proto {shlex.quote(proto)} "
|
||||||
|
f"-d {shlex.quote(json.dumps(payload))} "
|
||||||
|
f"{shlex.quote(endpoint)} {shlex.quote(service)}"
|
||||||
|
)
|
||||||
|
status, output = machine.execute(f"timeout 15 sh -lc {shlex.quote(command + ' 2>&1')}")
|
||||||
|
if status != 0:
|
||||||
|
raise AssertionError(
|
||||||
|
"grpcurl failed"
|
||||||
|
f" service={service}"
|
||||||
|
f" status={status}"
|
||||||
|
f" payload={json.dumps(payload, sort_keys=True)}"
|
||||||
|
f" output={output}"
|
||||||
|
)
|
||||||
|
return json.loads(output)
|
||||||
|
|
||||||
|
def issue_project_admin_token(machine, org_id, project_id):
|
||||||
|
principal_id = f"fiberlb-interop-{int(time.time())}"
|
||||||
|
deadline = time.time() + 120
|
||||||
|
|
||||||
|
def retry(action):
|
||||||
|
last_error = None
|
||||||
|
while time.time() < deadline:
|
||||||
|
try:
|
||||||
|
return action()
|
||||||
|
except Exception as exc:
|
||||||
|
last_error = exc
|
||||||
|
time.sleep(2)
|
||||||
|
raise AssertionError(f"IAM bootstrap timed out: {last_error}")
|
||||||
|
|
||||||
|
retry(lambda: grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50080",
|
||||||
|
IAM_PROTO_DIR,
|
||||||
|
IAM_PROTO,
|
||||||
|
"iam.v1.IamAdmin/CreatePrincipal",
|
||||||
|
{
|
||||||
|
"id": principal_id,
|
||||||
|
"kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
||||||
|
"name": principal_id,
|
||||||
|
"orgId": org_id,
|
||||||
|
"projectId": project_id,
|
||||||
|
},
|
||||||
|
))
|
||||||
|
retry(lambda: grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50080",
|
||||||
|
IAM_PROTO_DIR,
|
||||||
|
IAM_PROTO,
|
||||||
|
"iam.v1.IamAdmin/CreateBinding",
|
||||||
|
{
|
||||||
|
"principal": {
|
||||||
|
"kind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
||||||
|
"id": principal_id,
|
||||||
|
},
|
||||||
|
"role": "roles/ProjectAdmin",
|
||||||
|
"scope": {
|
||||||
|
"project": {
|
||||||
|
"id": project_id,
|
||||||
|
"orgId": org_id,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
))
|
||||||
|
token_response = retry(lambda: grpcurl_json(
|
||||||
|
machine,
|
||||||
|
"127.0.0.1:50080",
|
||||||
|
IAM_PROTO_DIR,
|
||||||
|
IAM_PROTO,
|
||||||
|
"iam.v1.IamToken/IssueToken",
|
||||||
|
{
|
||||||
|
"principalId": principal_id,
|
||||||
|
"principalKind": "PRINCIPAL_KIND_SERVICE_ACCOUNT",
|
||||||
|
"scope": {
|
||||||
|
"project": {
|
||||||
|
"id": project_id,
|
||||||
|
"orgId": org_id,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"ttlSeconds": 3600,
|
||||||
|
},
|
||||||
|
))
|
||||||
|
return token_response["token"]
|
||||||
|
|
||||||
|
def wait_for_backend_status(status, backend_id, token):
|
||||||
|
lb.wait_until_succeeds(
|
||||||
|
"grpcurl -plaintext "
|
||||||
|
f"-H {shlex.quote('authorization: Bearer ' + token)} "
|
||||||
|
f"-import-path {shlex.quote(FIBERLB_PROTO_DIR)} "
|
||||||
|
f"-proto {shlex.quote(FIBERLB_PROTO)} "
|
||||||
|
f"-d {shlex.quote(json.dumps({'id': backend_id}))} "
|
||||||
|
"127.0.0.1:50085 fiberlb.v1.BackendService/GetBackend "
|
||||||
|
f"| jq -e {shlex.quote(f'.backend.status == \"{status}\"')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def parse_labels(label_blob):
|
||||||
|
if not label_blob:
|
||||||
|
return {}
|
||||||
|
labels = {}
|
||||||
|
for part in label_blob.split(","):
|
||||||
|
key, value = part.split("=", 1)
|
||||||
|
labels[key] = value.strip().strip('"')
|
||||||
|
return labels
|
||||||
|
|
||||||
|
def fiberlb_diagnostics():
|
||||||
|
metrics = lb.succeed("curl -fsS http://127.0.0.1:9098/metrics || true")
|
||||||
|
journal = lb.succeed("journalctl -u fiberlb.service -n 200 --no-pager || true")
|
||||||
|
return (
|
||||||
|
"fiberlb metrics:\n"
|
||||||
|
f"{metrics}\n"
|
||||||
|
"fiberlb journal:\n"
|
||||||
|
f"{journal}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_for_metric(metric_name, expected_value, labels=None):
|
||||||
|
expected_labels = labels or {}
|
||||||
|
deadline = time.time() + 60
|
||||||
|
last_exposition = ""
|
||||||
|
|
||||||
|
while time.time() < deadline:
|
||||||
|
exposition = lb.succeed("curl -fsS http://127.0.0.1:9098/metrics")
|
||||||
|
last_exposition = exposition
|
||||||
|
for line in exposition.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
match = METRIC_RE.match(line)
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
name, label_blob, value = match.groups()
|
||||||
|
if name != metric_name:
|
||||||
|
continue
|
||||||
|
if parse_labels(label_blob) != expected_labels:
|
||||||
|
continue
|
||||||
|
if abs(float(value) - float(expected_value)) < 0.0001:
|
||||||
|
return
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
raise AssertionError(
|
||||||
|
f"metric {metric_name} with labels={expected_labels} did not reach {expected_value}\n"
|
||||||
|
f"last metrics scrape:\n{last_exposition}\n"
|
||||||
|
f"{fiberlb_diagnostics()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_for_local_vip(vip):
|
||||||
|
lb.wait_until_succeeds(f"ip -4 addr show dev lo | grep -F {shlex.quote('inet ' + vip + '/32')}")
|
||||||
|
|
||||||
|
def wait_for_gobgp_route(prefix, present):
|
||||||
|
command = "gobgp -u 127.0.0.1 -p 50051 global rib || true"
|
||||||
|
if present:
|
||||||
|
gobgp.wait_until_succeeds(f"{command} | grep -F {shlex.quote(prefix)}")
|
||||||
|
else:
|
||||||
|
deadline = time.time() + 60
|
||||||
|
while time.time() < deadline:
|
||||||
|
output = gobgp.succeed(command)
|
||||||
|
if prefix not in output:
|
||||||
|
return
|
||||||
|
time.sleep(1)
|
||||||
|
raise AssertionError(f"route {prefix} still present in GoBGP RIB")
|
||||||
|
|
||||||
|
def wait_for_bird_route(prefix):
|
||||||
|
bird.wait_until_succeeds(
|
||||||
|
f"birdc -s /run/bird.ctl show route for {shlex.quote(prefix)} all | grep -F {shlex.quote(prefix)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_for_frr_route(prefix):
|
||||||
|
frr.wait_until_succeeds(
|
||||||
|
f"vtysh -c {shlex.quote('show ip bgp ' + prefix)} | grep -F {shlex.quote(prefix)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_for_unit_or_dump(machine, unit):
|
||||||
|
deadline = time.time() + 120
|
||||||
|
while time.time() < deadline:
|
||||||
|
status, output = machine.execute(f"systemctl is-active {shlex.quote(unit)}")
|
||||||
|
state = output.strip()
|
||||||
|
if status == 0 and state == "active":
|
||||||
|
return
|
||||||
|
if state == "failed":
|
||||||
|
service_status = machine.succeed(
|
||||||
|
f"systemctl status {shlex.quote(unit)} --no-pager || true"
|
||||||
|
)
|
||||||
|
journal = machine.succeed(
|
||||||
|
f"journalctl -u {shlex.quote(unit)} -n 200 --no-pager || true"
|
||||||
|
)
|
||||||
|
raise AssertionError(
|
||||||
|
f"unit {unit} failed to start\n"
|
||||||
|
f"systemctl status:\n{service_status}\n"
|
||||||
|
f"journal:\n{journal}"
|
||||||
|
)
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
service_status = machine.succeed(
|
||||||
|
f"systemctl status {shlex.quote(unit)} --no-pager || true"
|
||||||
|
)
|
||||||
|
journal = machine.succeed(
|
||||||
|
f"journalctl -u {shlex.quote(unit)} -n 200 --no-pager || true"
|
||||||
|
)
|
||||||
|
raise AssertionError(
|
||||||
|
f"unit {unit} did not become active before timeout\n"
|
||||||
|
f"systemctl status:\n{service_status}\n"
|
||||||
|
f"journal:\n{journal}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def wait_for_command_or_dump(machine, command, unit=None, timeout=120):
|
||||||
|
deadline = time.time() + timeout
|
||||||
|
last_output = ""
|
||||||
|
while time.time() < deadline:
|
||||||
|
status, output = machine.execute(f"sh -lc {shlex.quote(command + ' 2>&1')}")
|
||||||
|
last_output = output
|
||||||
|
if status == 0:
|
||||||
|
return
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
diagnostics = f"last command output:\n{last_output}\n"
|
||||||
|
if unit is not None:
|
||||||
|
diagnostics += (
|
||||||
|
f"systemctl status:\n{machine.succeed(f'systemctl status {shlex.quote(unit)} --no-pager || true')}\n"
|
||||||
|
f"journal:\n{machine.succeed(f'journalctl -u {shlex.quote(unit)} -n 200 --no-pager || true')}\n"
|
||||||
|
)
|
||||||
|
diagnostics += f"socket state:\n{machine.succeed('ss -ltnp || true')}\n"
|
||||||
|
raise AssertionError(
|
||||||
|
f"command did not succeed before timeout: {command}\n{diagnostics}"
|
||||||
|
)
|
||||||
|
|
||||||
|
start_all()
|
||||||
|
serial_stdout_off()
|
||||||
|
|
||||||
|
wait_for_unit_or_dump(frr, "frr-zebra.service")
|
||||||
|
wait_for_command_or_dump(frr, "test -S /run/frr/zserv.api", "frr-zebra.service")
|
||||||
|
wait_for_unit_or_dump(frr, "frr-bgpd.service")
|
||||||
|
wait_for_command_or_dump(
|
||||||
|
frr,
|
||||||
|
"ss -ltnH '( sport = :179 )' | grep -q LISTEN",
|
||||||
|
"frr-bgpd.service",
|
||||||
|
)
|
||||||
|
wait_for_unit_or_dump(bird, "bird-peer.service")
|
||||||
|
wait_for_unit_or_dump(gobgp, "gobgpd-peer.service")
|
||||||
|
wait_for_command_or_dump(
|
||||||
|
gobgp,
|
||||||
|
"ss -ltnH '( sport = :179 )' | grep -q LISTEN",
|
||||||
|
"gobgpd-peer.service",
|
||||||
|
)
|
||||||
|
wait_for_unit_or_dump(lb, "iam.service")
|
||||||
|
wait_for_command_or_dump(lb, "ss -ltnH '( sport = :50080 )' | grep -q LISTEN", "iam.service")
|
||||||
|
wait_for_unit_or_dump(lb, "mock-backend.service")
|
||||||
|
wait_for_unit_or_dump(lb, "fiberlb.service")
|
||||||
|
wait_for_command_or_dump(lb, "ss -ltnH '( sport = :50085 )' | grep -q LISTEN", "fiberlb.service")
|
||||||
|
wait_for_command_or_dump(lb, "ss -ltnH '( sport = :9098 )' | grep -q LISTEN", "fiberlb.service")
|
||||||
|
|
||||||
|
frr.wait_until_succeeds("vtysh -c 'show ip bgp neighbor 192.168.100.2' | grep -F 'BGP state = Established'")
|
||||||
|
bird.wait_until_succeeds("birdc -s /run/bird.ctl show protocols all fiberlb_peer | grep -F Established")
|
||||||
|
gobgp.wait_until_succeeds("gobgp -u 127.0.0.1 -p 50051 neighbor | grep -F 192.168.100.2")
|
||||||
|
wait_for_metric("fiberlb_bgp_configured_peers", 3)
|
||||||
|
wait_for_metric("fiberlb_bgp_peer_session_up", 1, {"peer": "192.168.100.1:179"})
|
||||||
|
wait_for_metric("fiberlb_bgp_peer_session_up", 1, {"peer": "192.168.100.3:179"})
|
||||||
|
wait_for_metric("fiberlb_bgp_peer_session_up", 1, {"peer": "192.168.100.4:179"})
|
||||||
|
wait_for_metric("fiberlb_bgp_connected_peers", 3)
|
||||||
|
|
||||||
|
token = issue_project_admin_token(lb, "bgp-interop-org", "bgp-interop-project")
|
||||||
|
|
||||||
|
lb_response = grpcurl_json(
|
||||||
|
lb,
|
||||||
|
"127.0.0.1:50085",
|
||||||
|
FIBERLB_PROTO_DIR,
|
||||||
|
FIBERLB_PROTO,
|
||||||
|
"fiberlb.v1.LoadBalancerService/CreateLoadBalancer",
|
||||||
|
{
|
||||||
|
"name": "bgp-interop-lb",
|
||||||
|
"orgId": "bgp-interop-org",
|
||||||
|
"projectId": "bgp-interop-project",
|
||||||
|
"description": "native bgp interop smoke",
|
||||||
|
"vipAddress": "203.0.113.77",
|
||||||
|
},
|
||||||
|
headers=[f"authorization: Bearer {token}"],
|
||||||
|
)
|
||||||
|
loadbalancer = lb_response["loadbalancer"]
|
||||||
|
lb_id = loadbalancer["id"]
|
||||||
|
vip = loadbalancer["vipAddress"]
|
||||||
|
vip_prefix = f"{vip}/32"
|
||||||
|
|
||||||
|
pool_id = grpcurl_json(
|
||||||
|
lb,
|
||||||
|
"127.0.0.1:50085",
|
||||||
|
FIBERLB_PROTO_DIR,
|
||||||
|
FIBERLB_PROTO,
|
||||||
|
"fiberlb.v1.PoolService/CreatePool",
|
||||||
|
{
|
||||||
|
"name": "bgp-interop-pool",
|
||||||
|
"loadbalancerId": lb_id,
|
||||||
|
"algorithm": "POOL_ALGORITHM_ROUND_ROBIN",
|
||||||
|
"protocol": "POOL_PROTOCOL_TCP",
|
||||||
|
},
|
||||||
|
headers=[f"authorization: Bearer {token}"],
|
||||||
|
)["pool"]["id"]
|
||||||
|
|
||||||
|
backend_id = grpcurl_json(
|
||||||
|
lb,
|
||||||
|
"127.0.0.1:50085",
|
||||||
|
FIBERLB_PROTO_DIR,
|
||||||
|
FIBERLB_PROTO,
|
||||||
|
"fiberlb.v1.BackendService/CreateBackend",
|
||||||
|
{
|
||||||
|
"name": "bgp-interop-backend",
|
||||||
|
"poolId": pool_id,
|
||||||
|
"address": "127.0.0.1",
|
||||||
|
"port": 18081,
|
||||||
|
"weight": 1,
|
||||||
|
},
|
||||||
|
headers=[f"authorization: Bearer {token}"],
|
||||||
|
)["backend"]["id"]
|
||||||
|
|
||||||
|
grpcurl_json(
|
||||||
|
lb,
|
||||||
|
"127.0.0.1:50085",
|
||||||
|
FIBERLB_PROTO_DIR,
|
||||||
|
FIBERLB_PROTO,
|
||||||
|
"fiberlb.v1.HealthCheckService/CreateHealthCheck",
|
||||||
|
{
|
||||||
|
"name": "bgp-interop-health",
|
||||||
|
"poolId": pool_id,
|
||||||
|
"type": "HEALTH_CHECK_TYPE_HTTP",
|
||||||
|
"intervalSeconds": 1,
|
||||||
|
"timeoutSeconds": 1,
|
||||||
|
"healthyThreshold": 1,
|
||||||
|
"unhealthyThreshold": 1,
|
||||||
|
"httpConfig": {
|
||||||
|
"method": "GET",
|
||||||
|
"path": "/",
|
||||||
|
"expectedCodes": [200],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
headers=[f"authorization: Bearer {token}"],
|
||||||
|
)
|
||||||
|
|
||||||
|
grpcurl_json(
|
||||||
|
lb,
|
||||||
|
"127.0.0.1:50085",
|
||||||
|
FIBERLB_PROTO_DIR,
|
||||||
|
FIBERLB_PROTO,
|
||||||
|
"fiberlb.v1.ListenerService/CreateListener",
|
||||||
|
{
|
||||||
|
"name": "bgp-interop-listener",
|
||||||
|
"loadbalancerId": lb_id,
|
||||||
|
"protocol": "LISTENER_PROTOCOL_TCP",
|
||||||
|
"port": 18080,
|
||||||
|
"defaultPoolId": pool_id,
|
||||||
|
},
|
||||||
|
headers=[f"authorization: Bearer {token}"],
|
||||||
|
)
|
||||||
|
|
||||||
|
wait_for_backend_status("BACKEND_STATUS_ONLINE", backend_id, token)
|
||||||
|
wait_for_local_vip(vip)
|
||||||
|
wait_for_metric("fiberlb_bgp_desired_routes", 1)
|
||||||
|
wait_for_frr_route(vip_prefix)
|
||||||
|
wait_for_bird_route(vip_prefix)
|
||||||
|
wait_for_gobgp_route(vip_prefix, True)
|
||||||
|
|
||||||
|
frr.wait_until_succeeds(
|
||||||
|
"vtysh -c 'show ip bgp 203.0.113.77/32' | grep -F 'metric 10'"
|
||||||
|
)
|
||||||
|
frr.wait_until_succeeds(
|
||||||
|
"vtysh -c 'show ip bgp 203.0.113.77/32' | grep -F 'Community: 65010:101'"
|
||||||
|
)
|
||||||
|
bird.wait_until_succeeds(
|
||||||
|
"birdc -s /run/bird.ctl show route for 203.0.113.77/32 all | grep -F 'BGP.med: 20'"
|
||||||
|
)
|
||||||
|
bird.wait_until_succeeds(
|
||||||
|
"birdc -s /run/bird.ctl show route for 203.0.113.77/32 all | grep -F 'BGP.community: (65010,202)'"
|
||||||
|
)
|
||||||
|
|
||||||
|
gobgp.succeed("systemctl stop gobgpd-peer.service")
|
||||||
|
wait_for_metric("fiberlb_bgp_connected_peers", 2)
|
||||||
|
wait_for_metric("fiberlb_bgp_peer_session_up", 0, {"peer": "192.168.100.4:179"})
|
||||||
|
wait_for_frr_route(vip_prefix)
|
||||||
|
wait_for_bird_route(vip_prefix)
|
||||||
|
|
||||||
|
gobgp.succeed("systemctl start gobgpd-peer.service")
|
||||||
|
wait_for_unit_or_dump(gobgp, "gobgpd-peer.service")
|
||||||
|
gobgp.wait_until_succeeds("gobgp -u 127.0.0.1 -p 50051 neighbor | grep -F 192.168.100.2")
|
||||||
|
wait_for_metric("fiberlb_bgp_connected_peers", 3)
|
||||||
|
wait_for_metric("fiberlb_bgp_peer_session_up", 1, {"peer": "192.168.100.4:179"})
|
||||||
|
wait_for_gobgp_route(vip_prefix, True)
|
||||||
|
'';
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue