photoncloud-monorepo/flaredb/crates/flaredb-server/tests/region_failover.rs
centra 5c6eb04a46 T036: Add VM cluster deployment configs for nixos-anywhere
- netboot-base.nix with SSH key auth
- Launch scripts for node01/02/03
- Node configuration.nix and disko.nix
- Nix modules for first-boot automation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 09:59:19 +09:00

230 lines
7.7 KiB
Rust

//! FlareDB 3-Node Region Failover Test
//!
//! Verifies Raft consensus, region leader election, and data persistence under node failures.
use flaredb_proto::kvrpc::kv_cas_client::KvCasClient;
use flaredb_proto::kvrpc::{CasRequest, GetRequest};
use std::time::Duration;
use tokio::time::sleep;
use tonic::transport::Channel;
/// Helper to start a FlareDB node (placeholder - would need actual Server struct)
/// NOTE: This test requires FlareDB server infrastructure to be refactored
/// to support programmatic multi-instance startup similar to Chainfire.
///
/// Current limitation: FlareDB main.rs doesn't expose Server struct for testing.
/// This is a skeleton showing the intended test structure.
#[tokio::test]
#[ignore] // Run with: cargo test --test region_failover -- --ignored
async fn test_3node_region_leader_election() {
println!("\n=== Test: 3-Node FlareDB Region Leader Election ===");
println!("NOTE: Test skeleton - requires FlareDB server refactoring for multi-instance support");
// TODO: Start 3 FlareDB instances
// - Node 1 (store_id=1): API 127.0.0.1:2479, Raft 127.0.0.1:2480
// - Node 2 (store_id=2): API 127.0.0.1:2579, Raft 127.0.0.1:2580
// - Node 3 (store_id=3): API 127.0.0.1:2679, Raft 127.0.0.1:2680
//
// All nodes configured with:
// - Same region (id=1, start_key=[], end_key=[])
// - Peer list: [1, 2, 3]
// - Optional PD at localhost:9999
// Wait for Raft leader election
sleep(Duration::from_secs(2)).await;
println!("Would verify Raft leader elected");
// TODO: Connect to each node and verify one is leader
// Expected: One node has raft_leader=1 metric
}
#[tokio::test]
#[ignore]
async fn test_3node_data_replication() {
println!("\n=== Test: 3-Node Data Replication ===");
println!("NOTE: Test skeleton - requires server infrastructure");
// TODO: Start 3-node cluster (as above)
// Connect to leader and write data
println!("Would write data to region leader");
// let channel = Channel::from_static("http://127.0.0.1:2479").connect().await?;
// let mut client = KvCasClient::new(channel);
//
// let req = CasRequest {
// key: b"test-key".to_vec(),
// value: b"test-value".to_vec(),
// expected_version: 0,
// namespace: "default".to_string(),
// };
// client.compare_and_swap(req).await?;
sleep(Duration::from_millis(500)).await;
// Read from followers
println!("Would read from follower nodes");
// Verify data replicated to all nodes
println!("✓ Data replication would be verified");
}
#[tokio::test]
#[ignore]
async fn test_3node_leader_crash_failover() {
println!("\n=== Test: Region Leader Crash & Failover ===");
println!("NOTE: Test skeleton - requires server infrastructure");
// TODO: Start 3-node cluster
// Write data to region
println!("Would write test data");
// Identify and kill region leader
println!("Would kill region leader");
// Wait for re-election
sleep(Duration::from_secs(1)).await;
// Verify new leader elected and data readable
println!("Would verify new leader elected");
println!("Would verify data still accessible");
println!("✓ Leader failover would be verified");
}
#[tokio::test]
#[ignore]
async fn test_3node_quorum_maintenance() {
println!("\n=== Test: Quorum Maintenance (2/3 Survives) ===");
println!("NOTE: Test skeleton - requires server infrastructure");
// TODO: Start 3-node cluster
// Write initial data
println!("Would write initial data");
// Kill one node (non-leader)
println!("Would kill one follower node");
// Cluster should maintain 2/3 quorum
println!("Would verify writes still succeed with 2/3 quorum");
// Verify data readable from remaining nodes
println!("Would verify data accessible from remaining nodes");
println!("✓ Quorum maintenance would be verified");
}
// ============================================================================
// Working Example Test (using flaredb-client pattern from examples/test_cluster.rs)
// ============================================================================
#[tokio::test]
#[ignore]
async fn test_2node_basic_connectivity() {
println!("\n=== Test: 2-Node Basic Connectivity ===");
println!("Prerequisites: Two FlareDB servers must be running:");
println!(" Node 1: http://127.0.0.1:50051");
println!(" Node 2: http://127.0.0.1:50052");
println!();
// Connect to node 1
let result1 = Channel::from_static("http://127.0.0.1:50051")
.connect()
.await;
match result1 {
Ok(channel) => {
let mut client = KvCasClient::new(channel);
// Write key
println!("Writing key to node 1...");
let req = CasRequest {
key: b"integration-test-key".to_vec(),
value: b"integration-test-value".to_vec(),
expected_version: 0,
namespace: "default".to_string(),
};
match client.compare_and_swap(req).await {
Ok(resp) => {
let inner = resp.into_inner();
println!("✓ Write successful: version={}", inner.new_version);
assert!(inner.success);
}
Err(e) => {
println!("✗ Write failed: {}", e);
panic!("Write operation failed");
}
}
// Read back
println!("Reading key from node 1...");
let req = GetRequest {
key: b"integration-test-key".to_vec(),
namespace: "default".to_string(),
};
match client.get(req).await {
Ok(resp) => {
let inner = resp.into_inner();
println!(
"✓ Read successful: found={}, value={:?}",
inner.found,
String::from_utf8_lossy(&inner.value)
);
assert!(inner.found);
assert_eq!(&inner.value, b"integration-test-value");
}
Err(e) => {
println!("✗ Read failed: {}", e);
panic!("Read operation failed");
}
}
}
Err(e) => {
println!("✗ Cannot connect to node 1: {}", e);
println!("Skipping test - servers not running");
return;
}
}
// Try node 2 connectivity
println!("\nTesting node 2 connectivity...");
let result2 = Channel::from_static("http://127.0.0.1:50052")
.connect()
.await;
match result2 {
Ok(channel) => {
let mut client2 = KvCasClient::new(channel);
let req = GetRequest {
key: b"integration-test-key".to_vec(),
namespace: "default".to_string(),
};
match client2.get(req).await {
Ok(resp) => {
let inner = resp.into_inner();
if inner.found {
println!(
"✓ Node 2 has replicated data: {:?}",
String::from_utf8_lossy(&inner.value)
);
} else {
println!("⚠ Node 2 doesn't have data yet (leader-only reads?)");
}
}
Err(e) => {
println!("⚠ Node 2 read error (expected if not leader): {}", e);
}
}
}
Err(e) => {
println!("⚠ Cannot connect to node 2: {}", e);
}
}
println!("\n✓ Basic connectivity test complete");
}