Includes all pending changes needed for nixos-anywhere: - fiberlb: L7 policy, rule, certificate types - deployer: New service for cluster management - nix-nos: Generic network modules - Various service updates and fixes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
352 lines
11 KiB
Rust
352 lines
11 KiB
Rust
//! Maglev Consistent Hashing
|
|
//!
|
|
//! Implementation of Google's Maglev consistent hashing algorithm for L4 load balancing.
|
|
//! Reference: https://research.google/pubs/pub44824/
|
|
|
|
use std::collections::hash_map::DefaultHasher;
|
|
use std::hash::{Hash, Hasher};
|
|
use fiberlb_types::Backend;
|
|
|
|
/// Default lookup table size (prime number for better distribution)
|
|
/// Google's paper uses 65537, but we use a smaller prime for memory efficiency
|
|
pub const DEFAULT_TABLE_SIZE: usize = 65521;
|
|
|
|
/// Maglev lookup table for consistent hashing
|
|
#[derive(Debug, Clone)]
|
|
pub struct MaglevTable {
|
|
/// Lookup table mapping hash values to backend indices
|
|
table: Vec<usize>,
|
|
/// Backend identifiers (for reconstruction)
|
|
backends: Vec<String>,
|
|
/// Table size (must be prime)
|
|
size: usize,
|
|
}
|
|
|
|
impl MaglevTable {
|
|
/// Create a new Maglev lookup table from backends
|
|
///
|
|
/// # Arguments
|
|
/// * `backends` - List of backend servers
|
|
/// * `size` - Table size (should be a prime number, defaults to 65521)
|
|
pub fn new(backends: &[Backend], size: Option<usize>) -> Self {
|
|
let size = size.unwrap_or(DEFAULT_TABLE_SIZE);
|
|
|
|
if backends.is_empty() {
|
|
return Self {
|
|
table: vec![],
|
|
backends: vec![],
|
|
size,
|
|
};
|
|
}
|
|
|
|
let backend_ids: Vec<String> = backends
|
|
.iter()
|
|
.map(|b| format!("{}:{}", b.address, b.port))
|
|
.collect();
|
|
|
|
let table = Self::generate_lookup_table(&backend_ids, size);
|
|
|
|
Self {
|
|
table,
|
|
backends: backend_ids,
|
|
size,
|
|
}
|
|
}
|
|
|
|
/// Lookup a backend index for a given key (e.g., source IP + port)
|
|
pub fn lookup(&self, key: &str) -> Option<usize> {
|
|
if self.table.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
let hash = Self::hash_key(key);
|
|
let idx = (hash as usize) % self.size;
|
|
Some(self.table[idx])
|
|
}
|
|
|
|
/// Get the backend identifier at a given index
|
|
pub fn backend_id(&self, idx: usize) -> Option<&str> {
|
|
self.backends.get(idx).map(|s| s.as_str())
|
|
}
|
|
|
|
/// Get the number of backends
|
|
pub fn backend_count(&self) -> usize {
|
|
self.backends.len()
|
|
}
|
|
|
|
/// Generate the Maglev lookup table using double hashing
|
|
fn generate_lookup_table(backends: &[String], size: usize) -> Vec<usize> {
|
|
let n = backends.len();
|
|
let mut table = vec![usize::MAX; size];
|
|
let mut next = vec![0usize; n];
|
|
|
|
// Generate permutations for each backend
|
|
let permutations: Vec<Vec<usize>> = backends
|
|
.iter()
|
|
.map(|backend| Self::generate_permutation(backend, size))
|
|
.collect();
|
|
|
|
// Fill the lookup table
|
|
let mut filled = 0;
|
|
while filled < size {
|
|
for i in 0..n {
|
|
let mut cursor = next[i];
|
|
while cursor < size {
|
|
let c = permutations[i][cursor];
|
|
if table[c] == usize::MAX {
|
|
table[c] = i;
|
|
next[i] = cursor + 1;
|
|
filled += 1;
|
|
break;
|
|
}
|
|
cursor += 1;
|
|
}
|
|
|
|
if filled >= size {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
table
|
|
}
|
|
|
|
/// Generate a permutation for a backend using double hashing
|
|
fn generate_permutation(backend: &str, size: usize) -> Vec<usize> {
|
|
let offset = Self::hash_offset(backend, size);
|
|
let skip = Self::hash_skip(backend, size);
|
|
|
|
(0..size)
|
|
.map(|j| (offset + j * skip) % size)
|
|
.collect()
|
|
}
|
|
|
|
/// Hash function for offset calculation
|
|
fn hash_offset(backend: &str, size: usize) -> usize {
|
|
let mut hasher = DefaultHasher::new();
|
|
backend.hash(&mut hasher);
|
|
"offset".hash(&mut hasher);
|
|
(hasher.finish() as usize) % size
|
|
}
|
|
|
|
/// Hash function for skip calculation
|
|
fn hash_skip(backend: &str, size: usize) -> usize {
|
|
let mut hasher = DefaultHasher::new();
|
|
backend.hash(&mut hasher);
|
|
"skip".hash(&mut hasher);
|
|
let skip = (hasher.finish() as usize) % (size - 1) + 1;
|
|
skip
|
|
}
|
|
|
|
/// Hash a connection key (e.g., "192.168.1.1:54321")
|
|
fn hash_key(key: &str) -> u64 {
|
|
let mut hasher = DefaultHasher::new();
|
|
key.hash(&mut hasher);
|
|
hasher.finish()
|
|
}
|
|
}
|
|
|
|
/// Connection tracker for Maglev flow affinity
|
|
///
|
|
/// Tracks active connections to ensure that existing flows
|
|
/// continue to the same backend even if backend set changes
|
|
#[derive(Debug)]
|
|
pub struct ConnectionTracker {
|
|
/// Map from connection key to backend index
|
|
connections: std::collections::HashMap<String, usize>,
|
|
}
|
|
|
|
impl ConnectionTracker {
|
|
/// Create a new connection tracker
|
|
pub fn new() -> Self {
|
|
Self {
|
|
connections: std::collections::HashMap::new(),
|
|
}
|
|
}
|
|
|
|
/// Track a new connection
|
|
pub fn track(&mut self, key: String, backend_idx: usize) {
|
|
self.connections.insert(key, backend_idx);
|
|
}
|
|
|
|
/// Look up an existing connection
|
|
pub fn lookup(&self, key: &str) -> Option<usize> {
|
|
self.connections.get(key).copied()
|
|
}
|
|
|
|
/// Remove a connection (when it closes)
|
|
pub fn remove(&mut self, key: &str) -> Option<usize> {
|
|
self.connections.remove(key)
|
|
}
|
|
|
|
/// Get the number of tracked connections
|
|
pub fn connection_count(&self) -> usize {
|
|
self.connections.len()
|
|
}
|
|
|
|
/// Clear all tracked connections
|
|
pub fn clear(&mut self) {
|
|
self.connections.clear();
|
|
}
|
|
}
|
|
|
|
impl Default for ConnectionTracker {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use fiberlb_types::BackendAdminState;
|
|
use fiberlb_types::BackendStatus;
|
|
use fiberlb_types::PoolId;
|
|
|
|
fn create_test_backend(address: &str, port: u16) -> Backend {
|
|
Backend {
|
|
id: fiberlb_types::BackendId::new(),
|
|
pool_id: PoolId::new(),
|
|
name: format!("{}:{}", address, port),
|
|
address: address.to_string(),
|
|
port,
|
|
weight: 1,
|
|
admin_state: BackendAdminState::Enabled,
|
|
status: BackendStatus::Online,
|
|
created_at: 0,
|
|
updated_at: 0,
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_maglev_table_creation() {
|
|
let backends = vec![
|
|
create_test_backend("10.0.0.1", 8080),
|
|
create_test_backend("10.0.0.2", 8080),
|
|
create_test_backend("10.0.0.3", 8080),
|
|
];
|
|
|
|
let table = MaglevTable::new(&backends, Some(100));
|
|
assert_eq!(table.backend_count(), 3);
|
|
assert_eq!(table.table.len(), 100);
|
|
}
|
|
|
|
#[test]
|
|
fn test_maglev_lookup() {
|
|
let backends = vec![
|
|
create_test_backend("10.0.0.1", 8080),
|
|
create_test_backend("10.0.0.2", 8080),
|
|
create_test_backend("10.0.0.3", 8080),
|
|
];
|
|
|
|
let table = MaglevTable::new(&backends, Some(100));
|
|
|
|
// Same key should always return same backend
|
|
let key = "192.168.1.100:54321";
|
|
let idx1 = table.lookup(key).unwrap();
|
|
let idx2 = table.lookup(key).unwrap();
|
|
assert_eq!(idx1, idx2);
|
|
|
|
// Different keys should distribute across backends
|
|
let mut distribution = vec![0; 3];
|
|
for i in 0..1000 {
|
|
let key = format!("192.168.1.100:{}", 50000 + i);
|
|
if let Some(idx) = table.lookup(&key) {
|
|
distribution[idx] += 1;
|
|
}
|
|
}
|
|
|
|
// Each backend should get some traffic (rough distribution)
|
|
for count in &distribution {
|
|
assert!(*count > 200); // At least 20% each (should be ~33% each)
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_maglev_consistency_on_backend_removal() {
|
|
let backends = vec![
|
|
create_test_backend("10.0.0.1", 8080),
|
|
create_test_backend("10.0.0.2", 8080),
|
|
create_test_backend("10.0.0.3", 8080),
|
|
];
|
|
|
|
let table1 = MaglevTable::new(&backends, Some(1000));
|
|
|
|
// Generate mappings with 3 backends
|
|
let mut mappings = std::collections::HashMap::new();
|
|
for i in 0..100 {
|
|
let key = format!("192.168.1.100:{}", 50000 + i);
|
|
if let Some(idx) = table1.lookup(&key) {
|
|
mappings.insert(key.clone(), table1.backend_id(idx).unwrap().to_string());
|
|
}
|
|
}
|
|
|
|
// Remove one backend
|
|
let backends2 = vec![
|
|
create_test_backend("10.0.0.1", 8080),
|
|
create_test_backend("10.0.0.3", 8080),
|
|
];
|
|
|
|
let table2 = MaglevTable::new(&backends2, Some(1000));
|
|
|
|
// Count how many keys map to the same backend
|
|
let mut unchanged = 0;
|
|
let mut total = 0;
|
|
for (key, old_backend) in &mappings {
|
|
if let Some(idx) = table2.lookup(key) {
|
|
if let Some(new_backend) = table2.backend_id(idx) {
|
|
total += 1;
|
|
// Only keys that were on removed backend should change
|
|
if old_backend != "10.0.0.2:8080" {
|
|
if old_backend == new_backend {
|
|
unchanged += 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Most keys should remain on same backend (consistent hashing property)
|
|
// Keys on remaining backends should not change
|
|
assert!(unchanged > 50); // At least 50% consistency
|
|
}
|
|
|
|
#[test]
|
|
fn test_connection_tracker() {
|
|
let mut tracker = ConnectionTracker::new();
|
|
|
|
tracker.track("192.168.1.1:54321".to_string(), 0);
|
|
tracker.track("192.168.1.2:54322".to_string(), 1);
|
|
|
|
assert_eq!(tracker.lookup("192.168.1.1:54321"), Some(0));
|
|
assert_eq!(tracker.lookup("192.168.1.2:54322"), Some(1));
|
|
assert_eq!(tracker.lookup("192.168.1.3:54323"), None);
|
|
|
|
assert_eq!(tracker.connection_count(), 2);
|
|
|
|
tracker.remove("192.168.1.1:54321");
|
|
assert_eq!(tracker.connection_count(), 1);
|
|
assert_eq!(tracker.lookup("192.168.1.1:54321"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_empty_backend_list() {
|
|
let backends: Vec<Backend> = vec![];
|
|
let table = MaglevTable::new(&backends, Some(100));
|
|
|
|
assert_eq!(table.backend_count(), 0);
|
|
assert!(table.lookup("any-key").is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn test_single_backend() {
|
|
let backends = vec![create_test_backend("10.0.0.1", 8080)];
|
|
let table = MaglevTable::new(&backends, Some(100));
|
|
|
|
// All keys should map to the single backend
|
|
for i in 0..10 {
|
|
let key = format!("192.168.1.{}:54321", i);
|
|
assert_eq!(table.lookup(&key), Some(0));
|
|
}
|
|
}
|
|
}
|