- netboot-base.nix with SSH key auth - Launch scripts for node01/02/03 - Node configuration.nix and disko.nix - Nix modules for first-boot automation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
230 lines
7.7 KiB
Rust
230 lines
7.7 KiB
Rust
//! FlareDB 3-Node Region Failover Test
|
|
//!
|
|
//! Verifies Raft consensus, region leader election, and data persistence under node failures.
|
|
|
|
use flaredb_proto::kvrpc::kv_cas_client::KvCasClient;
|
|
use flaredb_proto::kvrpc::{CasRequest, GetRequest};
|
|
use std::time::Duration;
|
|
use tokio::time::sleep;
|
|
use tonic::transport::Channel;
|
|
|
|
/// Helper to start a FlareDB node (placeholder - would need actual Server struct)
|
|
/// NOTE: This test requires FlareDB server infrastructure to be refactored
|
|
/// to support programmatic multi-instance startup similar to Chainfire.
|
|
///
|
|
/// Current limitation: FlareDB main.rs doesn't expose Server struct for testing.
|
|
/// This is a skeleton showing the intended test structure.
|
|
|
|
#[tokio::test]
|
|
#[ignore] // Run with: cargo test --test region_failover -- --ignored
|
|
async fn test_3node_region_leader_election() {
|
|
println!("\n=== Test: 3-Node FlareDB Region Leader Election ===");
|
|
println!("NOTE: Test skeleton - requires FlareDB server refactoring for multi-instance support");
|
|
|
|
// TODO: Start 3 FlareDB instances
|
|
// - Node 1 (store_id=1): API 127.0.0.1:2479, Raft 127.0.0.1:2480
|
|
// - Node 2 (store_id=2): API 127.0.0.1:2579, Raft 127.0.0.1:2580
|
|
// - Node 3 (store_id=3): API 127.0.0.1:2679, Raft 127.0.0.1:2680
|
|
//
|
|
// All nodes configured with:
|
|
// - Same region (id=1, start_key=[], end_key=[])
|
|
// - Peer list: [1, 2, 3]
|
|
// - Optional PD at localhost:9999
|
|
|
|
// Wait for Raft leader election
|
|
sleep(Duration::from_secs(2)).await;
|
|
println!("Would verify Raft leader elected");
|
|
|
|
// TODO: Connect to each node and verify one is leader
|
|
// Expected: One node has raft_leader=1 metric
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore]
|
|
async fn test_3node_data_replication() {
|
|
println!("\n=== Test: 3-Node Data Replication ===");
|
|
println!("NOTE: Test skeleton - requires server infrastructure");
|
|
|
|
// TODO: Start 3-node cluster (as above)
|
|
|
|
// Connect to leader and write data
|
|
println!("Would write data to region leader");
|
|
// let channel = Channel::from_static("http://127.0.0.1:2479").connect().await?;
|
|
// let mut client = KvCasClient::new(channel);
|
|
//
|
|
// let req = CasRequest {
|
|
// key: b"test-key".to_vec(),
|
|
// value: b"test-value".to_vec(),
|
|
// expected_version: 0,
|
|
// namespace: "default".to_string(),
|
|
// };
|
|
// client.compare_and_swap(req).await?;
|
|
|
|
sleep(Duration::from_millis(500)).await;
|
|
|
|
// Read from followers
|
|
println!("Would read from follower nodes");
|
|
// Verify data replicated to all nodes
|
|
|
|
println!("✓ Data replication would be verified");
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore]
|
|
async fn test_3node_leader_crash_failover() {
|
|
println!("\n=== Test: Region Leader Crash & Failover ===");
|
|
println!("NOTE: Test skeleton - requires server infrastructure");
|
|
|
|
// TODO: Start 3-node cluster
|
|
|
|
// Write data to region
|
|
println!("Would write test data");
|
|
|
|
// Identify and kill region leader
|
|
println!("Would kill region leader");
|
|
|
|
// Wait for re-election
|
|
sleep(Duration::from_secs(1)).await;
|
|
|
|
// Verify new leader elected and data readable
|
|
println!("Would verify new leader elected");
|
|
println!("Would verify data still accessible");
|
|
|
|
println!("✓ Leader failover would be verified");
|
|
}
|
|
|
|
#[tokio::test]
|
|
#[ignore]
|
|
async fn test_3node_quorum_maintenance() {
|
|
println!("\n=== Test: Quorum Maintenance (2/3 Survives) ===");
|
|
println!("NOTE: Test skeleton - requires server infrastructure");
|
|
|
|
// TODO: Start 3-node cluster
|
|
|
|
// Write initial data
|
|
println!("Would write initial data");
|
|
|
|
// Kill one node (non-leader)
|
|
println!("Would kill one follower node");
|
|
|
|
// Cluster should maintain 2/3 quorum
|
|
println!("Would verify writes still succeed with 2/3 quorum");
|
|
|
|
// Verify data readable from remaining nodes
|
|
println!("Would verify data accessible from remaining nodes");
|
|
|
|
println!("✓ Quorum maintenance would be verified");
|
|
}
|
|
|
|
// ============================================================================
|
|
// Working Example Test (using flaredb-client pattern from examples/test_cluster.rs)
|
|
// ============================================================================
|
|
|
|
#[tokio::test]
|
|
#[ignore]
|
|
async fn test_2node_basic_connectivity() {
|
|
println!("\n=== Test: 2-Node Basic Connectivity ===");
|
|
println!("Prerequisites: Two FlareDB servers must be running:");
|
|
println!(" Node 1: http://127.0.0.1:50051");
|
|
println!(" Node 2: http://127.0.0.1:50052");
|
|
println!();
|
|
|
|
// Connect to node 1
|
|
let result1 = Channel::from_static("http://127.0.0.1:50051")
|
|
.connect()
|
|
.await;
|
|
|
|
match result1 {
|
|
Ok(channel) => {
|
|
let mut client = KvCasClient::new(channel);
|
|
|
|
// Write key
|
|
println!("Writing key to node 1...");
|
|
let req = CasRequest {
|
|
key: b"integration-test-key".to_vec(),
|
|
value: b"integration-test-value".to_vec(),
|
|
expected_version: 0,
|
|
namespace: "default".to_string(),
|
|
};
|
|
|
|
match client.compare_and_swap(req).await {
|
|
Ok(resp) => {
|
|
let inner = resp.into_inner();
|
|
println!("✓ Write successful: version={}", inner.new_version);
|
|
assert!(inner.success);
|
|
}
|
|
Err(e) => {
|
|
println!("✗ Write failed: {}", e);
|
|
panic!("Write operation failed");
|
|
}
|
|
}
|
|
|
|
// Read back
|
|
println!("Reading key from node 1...");
|
|
let req = GetRequest {
|
|
key: b"integration-test-key".to_vec(),
|
|
namespace: "default".to_string(),
|
|
};
|
|
|
|
match client.get(req).await {
|
|
Ok(resp) => {
|
|
let inner = resp.into_inner();
|
|
println!(
|
|
"✓ Read successful: found={}, value={:?}",
|
|
inner.found,
|
|
String::from_utf8_lossy(&inner.value)
|
|
);
|
|
assert!(inner.found);
|
|
assert_eq!(&inner.value, b"integration-test-value");
|
|
}
|
|
Err(e) => {
|
|
println!("✗ Read failed: {}", e);
|
|
panic!("Read operation failed");
|
|
}
|
|
}
|
|
}
|
|
Err(e) => {
|
|
println!("✗ Cannot connect to node 1: {}", e);
|
|
println!("Skipping test - servers not running");
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Try node 2 connectivity
|
|
println!("\nTesting node 2 connectivity...");
|
|
let result2 = Channel::from_static("http://127.0.0.1:50052")
|
|
.connect()
|
|
.await;
|
|
|
|
match result2 {
|
|
Ok(channel) => {
|
|
let mut client2 = KvCasClient::new(channel);
|
|
let req = GetRequest {
|
|
key: b"integration-test-key".to_vec(),
|
|
namespace: "default".to_string(),
|
|
};
|
|
|
|
match client2.get(req).await {
|
|
Ok(resp) => {
|
|
let inner = resp.into_inner();
|
|
if inner.found {
|
|
println!(
|
|
"✓ Node 2 has replicated data: {:?}",
|
|
String::from_utf8_lossy(&inner.value)
|
|
);
|
|
} else {
|
|
println!("⚠ Node 2 doesn't have data yet (leader-only reads?)");
|
|
}
|
|
}
|
|
Err(e) => {
|
|
println!("⚠ Node 2 read error (expected if not leader): {}", e);
|
|
}
|
|
}
|
|
}
|
|
Err(e) => {
|
|
println!("⚠ Cannot connect to node 2: {}", e);
|
|
}
|
|
}
|
|
|
|
println!("\n✓ Basic connectivity test complete");
|
|
}
|