chore: initial sync of untracked files and infrastructure components
This commit is contained in:
parent
6d4f826efb
commit
d3d74995e8
158 changed files with 37678 additions and 0 deletions
70
.github/workflows/nix.yml
vendored
Normal file
70
.github/workflows/nix.yml
vendored
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
name: Nix CI
|
||||
|
||||
on:
|
||||
push:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
flake-check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: DeterminateSystems/nix-installer-action@v11
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@v8
|
||||
- name: Nix flake check
|
||||
run: nix flake check --accept-flake-config
|
||||
|
||||
build-servers:
|
||||
runs-on: ubuntu-latest
|
||||
needs: flake-check
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: DeterminateSystems/nix-installer-action@v11
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@v8
|
||||
- name: Build server packages
|
||||
run: |
|
||||
nix build --accept-flake-config .#chainfire-server .#flaredb-server .#iam-server .#plasmavmc-server .#prismnet-server .#flashdns-server .#fiberlb-server .#lightningstor-server .#creditservice-server
|
||||
|
||||
integration-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build-servers
|
||||
env:
|
||||
PLASMA_E2E: "1"
|
||||
# SKIP_PLASMA defaults to 0; set repo/runner var to 1 only when qemu-img/KVM is unavailable.
|
||||
SKIP_PLASMA: ${{ vars.SKIP_PLASMA || '0' }}
|
||||
LOG_DIR: .cccc/work/integration-matrix/${{ github.run_id }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: DeterminateSystems/nix-installer-action@v11
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@v8
|
||||
- name: Run integration matrix (Noop hypervisor gate)
|
||||
run: |
|
||||
nix develop -c ./scripts/integration-matrix.sh
|
||||
- name: Upload integration-matrix logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: integration-matrix-logs
|
||||
path: .cccc/work/integration-matrix/
|
||||
|
||||
integration-matrix-kvm:
|
||||
if: ${{ vars.NESTED_KVM == '1' }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: integration-matrix
|
||||
env:
|
||||
PLASMA_E2E: "1"
|
||||
SKIP_PLASMA: "0"
|
||||
LOG_DIR: .cccc/work/integration-matrix-kvm/${{ github.run_id }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: DeterminateSystems/nix-installer-action@v11
|
||||
- uses: DeterminateSystems/magic-nix-cache-action@v8
|
||||
- name: Run integration matrix (KVM lane)
|
||||
run: |
|
||||
nix develop -c ./scripts/integration-matrix.sh
|
||||
- name: Upload integration-matrix-kvm logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: integration-matrix-kvm-logs
|
||||
path: .cccc/work/integration-matrix-kvm/
|
||||
3610
apigateway/Cargo.lock
generated
Normal file
3610
apigateway/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
55
apigateway/Cargo.toml
Normal file
55
apigateway/Cargo.toml
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
[workspace]
|
||||
resolver = "2"
|
||||
members = [
|
||||
"crates/apigateway-api",
|
||||
"crates/apigateway-server",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "MIT OR Apache-2.0"
|
||||
rust-version = "1.75"
|
||||
authors = ["PlasmaCloud Contributors"]
|
||||
repository = "https://github.com/yourorg/plasmacloud"
|
||||
|
||||
[workspace.dependencies]
|
||||
# Internal crates
|
||||
apigateway-api = { path = "crates/apigateway-api" }
|
||||
apigateway-server = { path = "crates/apigateway-server" }
|
||||
|
||||
# Async runtime
|
||||
tokio = { version = "1.40", features = ["full"] }
|
||||
|
||||
# HTTP server
|
||||
axum = "0.7"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
|
||||
|
||||
# Serialization
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
toml = "0.8"
|
||||
|
||||
# gRPC
|
||||
tonic = "0.12"
|
||||
tonic-build = "0.12"
|
||||
prost = "0.13"
|
||||
prost-types = "0.13"
|
||||
protoc-bin-vendored = "3.2"
|
||||
|
||||
# CLI
|
||||
clap = { version = "4", features = ["derive", "env"] }
|
||||
|
||||
# Logging
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
|
||||
# Utils
|
||||
async-trait = "0.1"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
|
||||
[workspace.lints.rust]
|
||||
unsafe_code = "deny"
|
||||
|
||||
[workspace.lints.clippy]
|
||||
all = "warn"
|
||||
19
apigateway/crates/apigateway-api/Cargo.toml
Normal file
19
apigateway/crates/apigateway-api/Cargo.toml
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
[package]
|
||||
name = "apigateway-api"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
rust-version.workspace = true
|
||||
description = "API Gateway gRPC protocol definitions"
|
||||
|
||||
[dependencies]
|
||||
tonic = { workspace = true }
|
||||
prost = { workspace = true }
|
||||
prost-types = { workspace = true }
|
||||
|
||||
[build-dependencies]
|
||||
tonic-build = { workspace = true }
|
||||
protoc-bin-vendored = { workspace = true }
|
||||
|
||||
[lib]
|
||||
path = "src/lib.rs"
|
||||
9
apigateway/crates/apigateway-api/build.rs
Normal file
9
apigateway/crates/apigateway-api/build.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let protoc = protoc_bin_vendored::protoc_bin_path()?;
|
||||
std::env::set_var("PROTOC", protoc);
|
||||
tonic_build::configure()
|
||||
.build_server(true)
|
||||
.build_client(true)
|
||||
.compile_protos(&["proto/apigateway.proto"], &["proto"])?;
|
||||
Ok(())
|
||||
}
|
||||
87
apigateway/crates/apigateway-api/proto/apigateway.proto
Normal file
87
apigateway/crates/apigateway-api/proto/apigateway.proto
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package apigateway.v1;
|
||||
|
||||
// ============================================================================
|
||||
// Gateway Auth Service
|
||||
// ============================================================================
|
||||
|
||||
service GatewayAuthService {
|
||||
rpc Authorize(AuthorizeRequest) returns (AuthorizeResponse);
|
||||
}
|
||||
|
||||
message Subject {
|
||||
string subject_id = 1;
|
||||
string org_id = 2;
|
||||
string project_id = 3;
|
||||
repeated string roles = 4;
|
||||
repeated string scopes = 5;
|
||||
}
|
||||
|
||||
message AuthorizeRequest {
|
||||
string request_id = 1;
|
||||
string token = 2;
|
||||
string method = 3;
|
||||
string path = 4;
|
||||
string raw_query = 5;
|
||||
map<string, string> headers = 6;
|
||||
string client_ip = 7;
|
||||
string route_name = 8;
|
||||
}
|
||||
|
||||
message AuthorizeResponse {
|
||||
bool allow = 1;
|
||||
string reason = 2;
|
||||
Subject subject = 3;
|
||||
map<string, string> headers = 4;
|
||||
uint32 ttl_seconds = 5;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Gateway Credit Service
|
||||
// ============================================================================
|
||||
|
||||
service GatewayCreditService {
|
||||
rpc Reserve(CreditReserveRequest) returns (CreditReserveResponse);
|
||||
rpc Commit(CreditCommitRequest) returns (CreditCommitResponse);
|
||||
rpc Rollback(CreditRollbackRequest) returns (CreditRollbackResponse);
|
||||
}
|
||||
|
||||
message CreditReserveRequest {
|
||||
string request_id = 1;
|
||||
string subject_id = 2;
|
||||
string org_id = 3;
|
||||
string project_id = 4;
|
||||
string route_name = 5;
|
||||
string method = 6;
|
||||
string path = 7;
|
||||
string raw_query = 8;
|
||||
uint64 units = 9;
|
||||
map<string, string> attributes = 10;
|
||||
}
|
||||
|
||||
message CreditReserveResponse {
|
||||
bool allow = 1;
|
||||
string reservation_id = 2;
|
||||
string reason = 3;
|
||||
uint64 remaining = 4;
|
||||
}
|
||||
|
||||
message CreditCommitRequest {
|
||||
string reservation_id = 1;
|
||||
uint64 units = 2;
|
||||
}
|
||||
|
||||
message CreditCommitResponse {
|
||||
bool success = 1;
|
||||
string reason = 2;
|
||||
}
|
||||
|
||||
message CreditRollbackRequest {
|
||||
string reservation_id = 1;
|
||||
}
|
||||
|
||||
message CreditRollbackResponse {
|
||||
bool success = 1;
|
||||
string reason = 2;
|
||||
}
|
||||
10
apigateway/crates/apigateway-api/src/lib.rs
Normal file
10
apigateway/crates/apigateway-api/src/lib.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
//! API Gateway gRPC protocol definitions
|
||||
|
||||
pub mod proto {
|
||||
tonic::include_proto!("apigateway.v1");
|
||||
}
|
||||
|
||||
pub use proto::gateway_auth_service_client::GatewayAuthServiceClient;
|
||||
pub use proto::gateway_auth_service_server::{GatewayAuthService, GatewayAuthServiceServer};
|
||||
pub use proto::gateway_credit_service_client::GatewayCreditServiceClient;
|
||||
pub use proto::gateway_credit_service_server::{GatewayCreditService, GatewayCreditServiceServer};
|
||||
38
apigateway/crates/apigateway-server/Cargo.toml
Normal file
38
apigateway/crates/apigateway-server/Cargo.toml
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
[package]
|
||||
name = "apigateway-server"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
rust-version.workspace = true
|
||||
description = "HTTP API gateway (scaffold)"
|
||||
|
||||
[[bin]]
|
||||
name = "apigateway-server"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
apigateway-api = { workspace = true }
|
||||
axum = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
reqwest = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tonic = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
futures-core = "0.3"
|
||||
bytes = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
iam-api = { path = "../../../iam/crates/iam-api" }
|
||||
iam-authn = { path = "../../../iam/crates/iam-authn" }
|
||||
iam-authz = { path = "../../../iam/crates/iam-authz" }
|
||||
iam-store = { path = "../../../iam/crates/iam-store" }
|
||||
iam-types = { path = "../../../iam/crates/iam-types" }
|
||||
creditservice-api = { path = "../../../creditservice/crates/creditservice-api" }
|
||||
creditservice-types = { path = "../../../creditservice/crates/creditservice-types" }
|
||||
tokio-stream = "0.1"
|
||||
1482
apigateway/crates/apigateway-server/src/main.rs
Normal file
1482
apigateway/crates/apigateway-server/src/main.rs
Normal file
File diff suppressed because it is too large
Load diff
15
chainfire/chainfire-client/examples/basic.rs
Normal file
15
chainfire/chainfire-client/examples/basic.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
use chainfire_client::Client;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Build a client with default retry/backoff.
|
||||
let mut client = Client::builder("http://127.0.0.1:2379").build().await?;
|
||||
|
||||
// Simple put/get roundtrip.
|
||||
client.put_str("/example/key", "value").await?;
|
||||
if let Some(val) = client.get_str("/example/key").await? {
|
||||
println!("Got value: {}", val);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
380
chainfire/chainfire-client/src/metadata.rs
Normal file
380
chainfire/chainfire-client/src/metadata.rs
Normal file
|
|
@ -0,0 +1,380 @@
|
|||
//! Metadata-oriented KV facade for Chainfire (and test backends).
|
||||
//!
|
||||
//! This module exists to standardize how PhotonCloud services interact with
|
||||
//! control-plane metadata: versioned reads, CAS, prefix scans, etc.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::RwLock;
|
||||
use thiserror::Error;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::{CasOutcome, Client as CfClient, ClientError as CfClientError};
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum MetadataError {
|
||||
#[error("Connection error: {0}")]
|
||||
Connection(String),
|
||||
#[error("Backend error: {0}")]
|
||||
Backend(String),
|
||||
#[error("Conflict: expected version {expected}, actual {actual}")]
|
||||
Conflict { expected: u64, actual: u64 },
|
||||
#[error("Not found")]
|
||||
NotFound,
|
||||
#[error("Serialization error: {0}")]
|
||||
Serialization(String),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, MetadataError>;
|
||||
|
||||
/// Key-value pair with version
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct KvPair {
|
||||
pub key: Bytes,
|
||||
pub value: Bytes,
|
||||
pub version: u64,
|
||||
}
|
||||
|
||||
/// Result of a CAS (Compare-And-Swap) operation
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CasResult {
|
||||
/// CAS succeeded, returning the new version
|
||||
Success(u64),
|
||||
/// CAS failed due to version mismatch or not found
|
||||
Conflict { expected: u64, actual: u64 },
|
||||
/// Key not found (when expected version > 0)
|
||||
NotFound,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait MetadataClient: Send + Sync {
|
||||
/// Get a value by key
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<(Bytes, u64)>>;
|
||||
|
||||
/// Put a value (unconditional write)
|
||||
async fn put(&self, key: &[u8], value: &[u8]) -> Result<u64>;
|
||||
|
||||
/// Compare-and-swap write
|
||||
/// - If expected_version is 0, only succeeds if key doesn't exist
|
||||
/// - Otherwise, only succeeds if current version matches expected_version
|
||||
async fn cas(&self, key: &[u8], expected_version: u64, value: &[u8]) -> Result<CasResult>;
|
||||
|
||||
/// Delete a key
|
||||
async fn delete(&self, key: &[u8]) -> Result<bool>;
|
||||
|
||||
/// Scan keys with a prefix
|
||||
async fn scan_prefix(&self, prefix: &[u8], limit: u32) -> Result<Vec<KvPair>>;
|
||||
|
||||
/// Scan keys in a range [start, end)
|
||||
async fn scan_range(&self, start: &[u8], end: &[u8], limit: u32) -> Result<Vec<KvPair>>;
|
||||
|
||||
/// Scan all keys with a prefix (best-effort pagination using `scan_range`).
|
||||
///
|
||||
/// This exists because `scan_prefix` is intentionally bounded by a `limit` but many
|
||||
/// control-plane callers need "list everything under a prefix" semantics.
|
||||
async fn scan_prefix_all(&self, prefix: &[u8]) -> Result<Vec<KvPair>> {
|
||||
const PAGE_SIZE: u32 = 1024;
|
||||
|
||||
let end = prefix_end(prefix);
|
||||
if end.is_empty() {
|
||||
// Prefix has no lexicographic successor (or is empty). Fall back to a single page.
|
||||
return self.scan_prefix(prefix, PAGE_SIZE).await;
|
||||
}
|
||||
|
||||
let mut out = Vec::new();
|
||||
let mut start = prefix.to_vec();
|
||||
|
||||
loop {
|
||||
let batch = self.scan_range(&start, &end, PAGE_SIZE).await?;
|
||||
if batch.is_empty() {
|
||||
break;
|
||||
}
|
||||
|
||||
let last_key = batch
|
||||
.last()
|
||||
.map(|kv| kv.key.clone())
|
||||
.unwrap_or_else(Bytes::new);
|
||||
|
||||
out.extend(batch);
|
||||
|
||||
let next = next_key_after(last_key.as_ref());
|
||||
if next <= start {
|
||||
// Defensive: avoid infinite loops if the backend returns unsorted/duplicate keys.
|
||||
break;
|
||||
}
|
||||
start = next;
|
||||
}
|
||||
|
||||
Ok(out)
|
||||
}
|
||||
}
|
||||
|
||||
fn prefix_end(prefix: &[u8]) -> Vec<u8> {
|
||||
let mut end = prefix.to_vec();
|
||||
for i in (0..end.len()).rev() {
|
||||
if end[i] < 0xff {
|
||||
end[i] += 1;
|
||||
end.truncate(i + 1);
|
||||
return end;
|
||||
}
|
||||
}
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
fn next_key_after(key: &[u8]) -> Vec<u8> {
|
||||
let mut next = key.to_vec();
|
||||
next.push(0);
|
||||
next
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Chainfire Implementation
|
||||
// ============================================================================
|
||||
|
||||
/// Thread-safe metadata client backed by the Chainfire gRPC client.
|
||||
pub struct ChainfireClient {
|
||||
client: Mutex<CfClient>,
|
||||
}
|
||||
|
||||
impl ChainfireClient {
|
||||
pub async fn new(endpoints: Vec<String>) -> Result<Self> {
|
||||
let client = Self::connect_any(&endpoints).await?;
|
||||
Ok(Self {
|
||||
client: Mutex::new(client),
|
||||
})
|
||||
}
|
||||
|
||||
async fn connect_any(endpoints: &[String]) -> Result<CfClient> {
|
||||
let mut last_err = None;
|
||||
for ep in endpoints {
|
||||
let addr = if ep.starts_with("http://") || ep.starts_with("https://") {
|
||||
ep.clone()
|
||||
} else {
|
||||
format!("http://{}", ep)
|
||||
};
|
||||
match CfClient::connect(addr.clone()).await {
|
||||
Ok(client) => return Ok(client),
|
||||
Err(e) => {
|
||||
last_err = Some(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(MetadataError::Connection(
|
||||
last_err
|
||||
.map(|e| e.to_string())
|
||||
.unwrap_or_else(|| "no endpoints available".into()),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl MetadataClient for ChainfireClient {
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<(Bytes, u64)>> {
|
||||
let mut client = self.client.lock().await;
|
||||
let result = client
|
||||
.get_with_revision(key)
|
||||
.await
|
||||
.map_err(map_chainfire_error)?;
|
||||
Ok(result.map(|(v, rev)| (Bytes::from(v), rev)))
|
||||
}
|
||||
|
||||
async fn put(&self, key: &[u8], value: &[u8]) -> Result<u64> {
|
||||
let mut client = self.client.lock().await;
|
||||
client.put(key, value).await.map_err(map_chainfire_error)
|
||||
}
|
||||
|
||||
async fn cas(&self, key: &[u8], expected_version: u64, value: &[u8]) -> Result<CasResult> {
|
||||
let mut client = self.client.lock().await;
|
||||
let outcome: CasOutcome = client
|
||||
.compare_and_swap(key, expected_version, value)
|
||||
.await
|
||||
.map_err(map_chainfire_error)?;
|
||||
|
||||
if outcome.success {
|
||||
return Ok(CasResult::Success(outcome.new_version));
|
||||
}
|
||||
|
||||
if expected_version == 0 {
|
||||
if outcome.current_version == 0 {
|
||||
Ok(CasResult::NotFound)
|
||||
} else {
|
||||
Ok(CasResult::Conflict {
|
||||
expected: 0,
|
||||
actual: outcome.current_version,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
Ok(CasResult::Conflict {
|
||||
expected: expected_version,
|
||||
actual: outcome.current_version,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete(&self, key: &[u8]) -> Result<bool> {
|
||||
let mut client = self.client.lock().await;
|
||||
client.delete(key).await.map_err(map_chainfire_error)
|
||||
}
|
||||
|
||||
async fn scan_prefix(&self, prefix: &[u8], limit: u32) -> Result<Vec<KvPair>> {
|
||||
let mut client = self.client.lock().await;
|
||||
let (results, _) = client
|
||||
.scan_prefix(prefix, limit as i64)
|
||||
.await
|
||||
.map_err(map_chainfire_error)?;
|
||||
|
||||
Ok(results
|
||||
.into_iter()
|
||||
.map(|(k, v, ver)| KvPair {
|
||||
key: Bytes::from(k),
|
||||
value: Bytes::from(v),
|
||||
version: ver,
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn scan_range(&self, start: &[u8], end: &[u8], limit: u32) -> Result<Vec<KvPair>> {
|
||||
let mut client = self.client.lock().await;
|
||||
let (results, _) = client
|
||||
.scan_range(start, end, limit as i64)
|
||||
.await
|
||||
.map_err(map_chainfire_error)?;
|
||||
|
||||
Ok(results
|
||||
.into_iter()
|
||||
.map(|(k, v, ver)| KvPair {
|
||||
key: Bytes::from(k),
|
||||
value: Bytes::from(v),
|
||||
version: ver,
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
|
||||
fn map_chainfire_error(err: CfClientError) -> MetadataError {
|
||||
match err {
|
||||
CfClientError::Connection(msg) => MetadataError::Connection(msg),
|
||||
CfClientError::Transport(e) => MetadataError::Connection(e.to_string()),
|
||||
CfClientError::Rpc(status) => MetadataError::Backend(status.to_string()),
|
||||
other => MetadataError::Backend(other.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Memory Implementation
|
||||
// ============================================================================
|
||||
|
||||
pub struct MemoryClient {
|
||||
data: RwLock<BTreeMap<Vec<u8>, (Vec<u8>, u64)>>,
|
||||
version_counter: RwLock<u64>,
|
||||
}
|
||||
|
||||
impl MemoryClient {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
data: RwLock::new(BTreeMap::new()),
|
||||
version_counter: RwLock::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
fn next_version(&self) -> u64 {
|
||||
let mut counter = self.version_counter.write().unwrap();
|
||||
*counter += 1;
|
||||
*counter
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MemoryClient {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl MetadataClient for MemoryClient {
|
||||
async fn get(&self, key: &[u8]) -> Result<Option<(Bytes, u64)>> {
|
||||
let data = self.data.read().unwrap();
|
||||
Ok(data
|
||||
.get(key)
|
||||
.map(|(v, ver)| (Bytes::copy_from_slice(v), *ver)))
|
||||
}
|
||||
|
||||
async fn put(&self, key: &[u8], value: &[u8]) -> Result<u64> {
|
||||
let version = self.next_version();
|
||||
let mut data = self.data.write().unwrap();
|
||||
data.insert(key.to_vec(), (value.to_vec(), version));
|
||||
Ok(version)
|
||||
}
|
||||
|
||||
async fn cas(&self, key: &[u8], expected_version: u64, value: &[u8]) -> Result<CasResult> {
|
||||
let mut data = self.data.write().unwrap();
|
||||
|
||||
match data.get(key) {
|
||||
Some((_, current_version)) => {
|
||||
if *current_version != expected_version {
|
||||
return Ok(CasResult::Conflict {
|
||||
expected: expected_version,
|
||||
actual: *current_version,
|
||||
});
|
||||
}
|
||||
}
|
||||
None => {
|
||||
if expected_version != 0 {
|
||||
return Ok(CasResult::NotFound);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let version = self.next_version();
|
||||
data.insert(key.to_vec(), (value.to_vec(), version));
|
||||
Ok(CasResult::Success(version))
|
||||
}
|
||||
|
||||
async fn delete(&self, key: &[u8]) -> Result<bool> {
|
||||
let mut data = self.data.write().unwrap();
|
||||
Ok(data.remove(key).is_some())
|
||||
}
|
||||
|
||||
async fn scan_prefix(&self, prefix: &[u8], limit: u32) -> Result<Vec<KvPair>> {
|
||||
let data = self.data.read().unwrap();
|
||||
let mut results = Vec::new();
|
||||
|
||||
for (k, (v, ver)) in data.range(prefix.to_vec()..) {
|
||||
if !k.starts_with(prefix) {
|
||||
break;
|
||||
}
|
||||
results.push(KvPair {
|
||||
key: Bytes::copy_from_slice(k),
|
||||
value: Bytes::copy_from_slice(v),
|
||||
version: *ver,
|
||||
});
|
||||
if results.len() >= limit as usize {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
async fn scan_range(&self, start: &[u8], end: &[u8], limit: u32) -> Result<Vec<KvPair>> {
|
||||
let data = self.data.read().unwrap();
|
||||
let mut results = Vec::new();
|
||||
|
||||
for (k, (v, ver)) in data.range(start.to_vec()..end.to_vec()) {
|
||||
results.push(KvPair {
|
||||
key: Bytes::copy_from_slice(k),
|
||||
value: Bytes::copy_from_slice(v),
|
||||
version: *ver,
|
||||
});
|
||||
if results.len() >= limit as usize {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
60
chainfire/crates/chainfire-core/src/traits.rs
Normal file
60
chainfire/crates/chainfire-core/src/traits.rs
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
use async_trait::async_trait;
|
||||
use chainfire_types::node::NodeInfo;
|
||||
use crate::error::Result;
|
||||
use std::net::SocketAddr;
|
||||
|
||||
/// Abstract interface for Gossip protocol
|
||||
#[async_trait]
|
||||
pub trait Gossip: Send + Sync {
|
||||
/// Start the gossip agent
|
||||
async fn start(&self) -> Result<()>;
|
||||
|
||||
/// Join a cluster via seed nodes
|
||||
async fn join(&self, seeds: &[SocketAddr]) -> Result<()>;
|
||||
|
||||
/// Announce presence to a specific node
|
||||
async fn announce(&self, addr: SocketAddr) -> Result<()>;
|
||||
|
||||
/// Get list of known members
|
||||
fn members(&self) -> Vec<NodeInfo>;
|
||||
|
||||
/// Shutdown the gossip agent
|
||||
async fn shutdown(&self) -> Result<()>;
|
||||
}
|
||||
|
||||
/// Abstract interface for Consensus protocol (Raft)
|
||||
#[async_trait]
|
||||
pub trait Consensus: Send + Sync {
|
||||
/// Initialize the consensus module
|
||||
async fn initialize(&self) -> Result<()>;
|
||||
|
||||
/// Start the event loop
|
||||
async fn run(&self) -> Result<()>;
|
||||
|
||||
/// Propose a command to the state machine
|
||||
async fn propose(&self, data: Vec<u8>) -> Result<u64>;
|
||||
|
||||
/// Add a node to the consensus group
|
||||
async fn add_node(&self, node_id: u64, addr: String, as_learner: bool) -> Result<()>;
|
||||
|
||||
/// Remove a node from the consensus group
|
||||
async fn remove_node(&self, node_id: u64) -> Result<()>;
|
||||
|
||||
/// Check if this node is the leader
|
||||
fn is_leader(&self) -> bool;
|
||||
|
||||
/// Get the current leader ID
|
||||
fn leader_id(&self) -> Option<u64>;
|
||||
}
|
||||
|
||||
/// Abstract interface for State Machine
|
||||
pub trait StateMachine: Send + Sync {
|
||||
/// Apply a committed entry
|
||||
fn apply(&self, index: u64, data: &[u8]) -> Result<Vec<u8>>;
|
||||
|
||||
/// Take a snapshot of current state
|
||||
fn snapshot(&self) -> Result<Vec<u8>>;
|
||||
|
||||
/// Restore state from a snapshot
|
||||
fn restore(&self, snapshot: &[u8]) -> Result<()>;
|
||||
}
|
||||
52
chainfire/crates/chainfire-core/tests/integration.rs
Normal file
52
chainfire/crates/chainfire-core/tests/integration.rs
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
use std::time::Duration;
|
||||
use chainfire_core::ClusterBuilder;
|
||||
use chainfire_types::{node::NodeRole, RaftRole};
|
||||
use tokio::time::sleep;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_single_node_bootstrap() {
|
||||
let _ = tracing_subscriber::fmt::try_init();
|
||||
|
||||
// 1. Build a single node cluster
|
||||
let cluster = ClusterBuilder::new(1)
|
||||
.name("node-1")
|
||||
.memory_storage()
|
||||
.gossip_addr("127.0.0.1:0".parse().unwrap())
|
||||
.raft_addr("127.0.0.1:0".parse().unwrap())
|
||||
.role(NodeRole::ControlPlane)
|
||||
.raft_role(RaftRole::Voter)
|
||||
.bootstrap(true)
|
||||
.build()
|
||||
.await
|
||||
.expect("Failed to build cluster");
|
||||
|
||||
let handle = cluster.handle();
|
||||
|
||||
// 2. Run the cluster in a background task
|
||||
tokio::spawn(async move {
|
||||
cluster.run().await.unwrap();
|
||||
});
|
||||
|
||||
// 3. Wait for leader election
|
||||
let mut leader_elected = false;
|
||||
for _ in 0..10 {
|
||||
if handle.is_leader() {
|
||||
leader_elected = true;
|
||||
break;
|
||||
}
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
|
||||
assert!(leader_elected, "Node 1 should become leader in bootstrap mode");
|
||||
assert_eq!(handle.leader(), Some(1));
|
||||
|
||||
// 4. Test KV operations
|
||||
let kv = handle.kv();
|
||||
kv.put("test-key", b"test-value").await.expect("Put failed");
|
||||
|
||||
let value = kv.get("test-key").await.expect("Get failed");
|
||||
assert_eq!(value, Some(b"test-value".to_vec()));
|
||||
|
||||
// 5. Shutdown
|
||||
handle.shutdown();
|
||||
}
|
||||
378
chainfire/crates/chainfire-raft/src/storage.rs
Normal file
378
chainfire/crates/chainfire-raft/src/storage.rs
Normal file
|
|
@ -0,0 +1,378 @@
|
|||
//! Storage primitives used by `chainfire-raft`.
|
||||
//!
|
||||
//! In production (`rocksdb-storage` feature), we re-export the real ChainFire storage layer.
|
||||
//! For lightweight testing/simulation (default), we provide a small in-memory implementation
|
||||
//! that avoids native dependencies (RocksDB/libclang).
|
||||
|
||||
#[cfg(feature = "rocksdb-storage")]
|
||||
pub use chainfire_storage::{
|
||||
EntryPayload, LogEntry, LogId, LogState, LogStorage, StateMachine, Vote,
|
||||
};
|
||||
|
||||
#[cfg(not(feature = "rocksdb-storage"))]
|
||||
mod mem {
|
||||
use chainfire_types::command::{RaftCommand, RaftResponse};
|
||||
use chainfire_types::error::StorageError;
|
||||
use chainfire_types::kv::{KvEntry, Revision};
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::ops::RangeBounds;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
pub type LogIndex = u64;
|
||||
pub type Term = u64;
|
||||
|
||||
/// Log ID combining term and index.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Default)]
|
||||
pub struct LogId {
|
||||
pub term: Term,
|
||||
pub index: LogIndex,
|
||||
}
|
||||
|
||||
/// Payload of a log entry.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum EntryPayload<D> {
|
||||
/// A blank entry for leader establishment.
|
||||
Blank,
|
||||
/// A normal data entry.
|
||||
Normal(D),
|
||||
/// Membership change entry.
|
||||
Membership(Vec<u64>),
|
||||
}
|
||||
|
||||
/// A log entry stored in the Raft log.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct LogEntry<D> {
|
||||
pub log_id: LogId,
|
||||
pub payload: EntryPayload<D>,
|
||||
}
|
||||
|
||||
/// Persisted vote information.
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, Default)]
|
||||
pub struct Vote {
|
||||
pub term: Term,
|
||||
pub node_id: Option<u64>,
|
||||
pub committed: bool,
|
||||
}
|
||||
|
||||
/// Log storage state.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct LogState {
|
||||
pub last_purged_log_id: Option<LogId>,
|
||||
pub last_log_id: Option<LogId>,
|
||||
}
|
||||
|
||||
/// In-memory Raft log storage.
|
||||
///
|
||||
/// Stores bincode-encoded `LogEntry<D>` blobs keyed by log index.
|
||||
pub struct LogStorage {
|
||||
vote: RwLock<Option<Vote>>,
|
||||
logs: RwLock<BTreeMap<LogIndex, Vec<u8>>>,
|
||||
last_purged_log_id: RwLock<Option<LogId>>,
|
||||
}
|
||||
|
||||
impl Default for LogStorage {
|
||||
fn default() -> Self {
|
||||
Self::new_in_memory()
|
||||
}
|
||||
}
|
||||
|
||||
impl LogStorage {
|
||||
pub fn new_in_memory() -> Self {
|
||||
Self {
|
||||
vote: RwLock::new(None),
|
||||
logs: RwLock::new(BTreeMap::new()),
|
||||
last_purged_log_id: RwLock::new(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store(&self) -> chainfire_storage::RocksStore {
|
||||
// This is a hack to satisfy the API. In memory mode, we shouldn't really
|
||||
// be calling this if we want to avoid RocksDB, but chainfire-api expects it.
|
||||
panic!("LogStorage::store() called in memory mode");
|
||||
}
|
||||
|
||||
pub fn get_log_state(&self) -> Result<LogState, StorageError> {
|
||||
let last_purged_log_id = *self.last_purged_log_id.read();
|
||||
let logs = self.logs.read();
|
||||
let last_log_id = match logs.iter().next_back() {
|
||||
Some((_idx, bytes)) if !bytes.is_empty() => {
|
||||
match bincode::deserialize::<LogEntry<Vec<u8>>>(bytes) {
|
||||
Ok(entry) => Some(entry.log_id),
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"Warning: Failed to deserialize log entry in mem storage: {e}, treating as empty log"
|
||||
);
|
||||
last_purged_log_id
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => last_purged_log_id,
|
||||
};
|
||||
|
||||
Ok(LogState {
|
||||
last_purged_log_id,
|
||||
last_log_id,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn save_vote(&self, vote: Vote) -> Result<(), StorageError> {
|
||||
*self.vote.write() = Some(vote);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn read_vote(&self) -> Result<Option<Vote>, StorageError> {
|
||||
Ok(*self.vote.read())
|
||||
}
|
||||
|
||||
pub fn append<D: Serialize>(&self, entries: &[LogEntry<D>]) -> Result<(), StorageError> {
|
||||
if entries.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let mut logs = self.logs.write();
|
||||
for entry in entries {
|
||||
let bytes = bincode::serialize(entry)
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
||||
logs.insert(entry.log_id.index, bytes);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_log_entries<D: for<'de> Deserialize<'de>>(
|
||||
&self,
|
||||
range: impl RangeBounds<LogIndex>,
|
||||
) -> Result<Vec<LogEntry<D>>, StorageError> {
|
||||
let logs = self.logs.read();
|
||||
|
||||
let start = match range.start_bound() {
|
||||
std::ops::Bound::Included(&idx) => idx,
|
||||
std::ops::Bound::Excluded(&idx) => idx + 1,
|
||||
std::ops::Bound::Unbounded => 0,
|
||||
};
|
||||
|
||||
let end = match range.end_bound() {
|
||||
std::ops::Bound::Included(&idx) => Some(idx),
|
||||
std::ops::Bound::Excluded(&idx) => Some(idx.saturating_sub(1)),
|
||||
std::ops::Bound::Unbounded => None,
|
||||
};
|
||||
|
||||
let iter: Box<dyn Iterator<Item = (&LogIndex, &Vec<u8>)> + '_> = match end {
|
||||
Some(end_inclusive) => Box::new(logs.range(start..=end_inclusive)),
|
||||
None => Box::new(logs.range(start..)),
|
||||
};
|
||||
|
||||
let mut out = Vec::new();
|
||||
for (_idx, bytes) in iter {
|
||||
let entry: LogEntry<D> = bincode::deserialize(bytes)
|
||||
.map_err(|e| StorageError::Serialization(e.to_string()))?;
|
||||
out.push(entry);
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
pub fn truncate(&self, from_index: LogIndex) -> Result<(), StorageError> {
|
||||
let mut logs = self.logs.write();
|
||||
// Remove all entries >= from_index
|
||||
let _ = logs.split_off(&from_index);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn purge_with_log_id(&self, log_id: LogId) -> Result<(), StorageError> {
|
||||
// In-memory compaction marker only; entries are not retained once purged.
|
||||
*self.last_purged_log_id.write() = Some(log_id);
|
||||
self.truncate(log_id.index + 1)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Minimal in-memory KV store used by the in-memory state machine.
|
||||
pub struct KvStore {
|
||||
data: RwLock<HashMap<Vec<u8>, KvEntry>>,
|
||||
revision: AtomicU64,
|
||||
}
|
||||
|
||||
impl Default for KvStore {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
data: RwLock::new(HashMap::new()),
|
||||
revision: AtomicU64::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl KvStore {
|
||||
pub fn current_revision(&self) -> Revision {
|
||||
self.revision.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
fn next_revision(&self) -> Revision {
|
||||
self.revision.fetch_add(1, Ordering::SeqCst) + 1
|
||||
}
|
||||
|
||||
pub fn get(&self, key: &[u8]) -> Result<Option<KvEntry>, StorageError> {
|
||||
Ok(self.data.read().get(key).cloned())
|
||||
}
|
||||
|
||||
pub fn range_count(&self, start: &[u8], end: Option<&[u8]>) -> Result<usize, StorageError> {
|
||||
let data = self.data.read();
|
||||
let count = if let Some(end) = end {
|
||||
data.iter().filter(|(k, _)| k.as_slice() >= start && k.as_slice() < end).count()
|
||||
} else {
|
||||
data.iter().filter(|(k, _)| k.as_slice() >= start).count()
|
||||
};
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
pub fn range_with_limit(&self, start: &[u8], end: Option<&[u8]>, limit: Option<usize>) -> Result<(Vec<KvEntry>, bool), StorageError> {
|
||||
let data = self.data.read();
|
||||
let mut entries: Vec<_> = if let Some(end) = end {
|
||||
data.iter()
|
||||
.filter(|(k, _)| k.as_slice() >= start && k.as_slice() < end)
|
||||
.map(|(_, v)| v.clone())
|
||||
.collect()
|
||||
} else {
|
||||
data.iter()
|
||||
.filter(|(k, _)| k.as_slice() >= start)
|
||||
.map(|(_, v)| v.clone())
|
||||
.collect()
|
||||
};
|
||||
entries.sort_by(|a, b| a.key.cmp(&b.key));
|
||||
|
||||
if let Some(limit) = limit {
|
||||
let more = entries.len() > limit;
|
||||
entries.truncate(limit);
|
||||
Ok((entries, more))
|
||||
} else {
|
||||
Ok((entries, false))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_revision(&self, revision: Revision) {
|
||||
self.revision.store(revision, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
pub fn put(
|
||||
&self,
|
||||
key: Vec<u8>,
|
||||
value: Vec<u8>,
|
||||
lease_id: Option<i64>,
|
||||
) -> Result<(Revision, Option<KvEntry>), StorageError> {
|
||||
let mut data = self.data.write();
|
||||
let prev = data.get(&key).cloned();
|
||||
let revision = self.next_revision();
|
||||
|
||||
let entry = match &prev {
|
||||
Some(old) => old.update(value, revision),
|
||||
None => {
|
||||
if let Some(lease) = lease_id {
|
||||
KvEntry::with_lease(key.clone(), value, revision, lease)
|
||||
} else {
|
||||
KvEntry::new(key.clone(), value, revision)
|
||||
}
|
||||
}
|
||||
};
|
||||
data.insert(key, entry);
|
||||
Ok((revision, prev))
|
||||
}
|
||||
|
||||
pub fn delete(&self, key: &[u8]) -> Result<(Revision, Option<KvEntry>), StorageError> {
|
||||
let mut data = self.data.write();
|
||||
let prev = data.remove(key);
|
||||
let revision = self.next_revision();
|
||||
Ok((revision, prev))
|
||||
}
|
||||
}
|
||||
|
||||
/// Minimal in-memory state machine for Raft simulation.
|
||||
pub struct StateMachine {
|
||||
kv: KvStore,
|
||||
}
|
||||
|
||||
pub struct LeaseStore;
|
||||
impl LeaseStore {
|
||||
pub fn list(&self) -> Vec<chainfire_types::lease::Lease> { vec![] }
|
||||
}
|
||||
|
||||
impl Default for StateMachine {
|
||||
fn default() -> Self {
|
||||
Self::new_in_memory()
|
||||
}
|
||||
}
|
||||
|
||||
impl StateMachine {
|
||||
pub fn new_in_memory() -> Self {
|
||||
Self { kv: KvStore::default() }
|
||||
}
|
||||
|
||||
pub fn kv(&self) -> &KvStore {
|
||||
&self.kv
|
||||
}
|
||||
|
||||
pub fn current_revision(&self) -> Revision {
|
||||
self.kv.current_revision()
|
||||
}
|
||||
|
||||
pub fn leases(&self) -> LeaseStore {
|
||||
LeaseStore
|
||||
}
|
||||
|
||||
pub fn apply(&self, command: RaftCommand) -> Result<RaftResponse, StorageError> {
|
||||
match command {
|
||||
RaftCommand::Put {
|
||||
key,
|
||||
value,
|
||||
lease_id,
|
||||
prev_kv,
|
||||
} => {
|
||||
let (rev, prev) = self.kv.put(key, value, lease_id)?;
|
||||
Ok(RaftResponse::with_prev_kv(rev, if prev_kv { prev } else { None }))
|
||||
}
|
||||
RaftCommand::Delete { key, prev_kv } => {
|
||||
let (rev, prev) = self.kv.delete(&key)?;
|
||||
let deleted = if prev.is_some() { 1 } else { 0 };
|
||||
Ok(RaftResponse {
|
||||
revision: rev,
|
||||
prev_kv: if prev_kv { prev } else { None },
|
||||
deleted,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
RaftCommand::Noop => Ok(RaftResponse::new(self.current_revision())),
|
||||
other => Err(StorageError::Serialization(format!(
|
||||
"mem state machine: unsupported command variant: {other:?}"
|
||||
))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn mem_log_storage_append_and_get() {
|
||||
let storage = LogStorage::new_in_memory();
|
||||
let entries = vec![
|
||||
LogEntry {
|
||||
log_id: LogId { term: 1, index: 1 },
|
||||
payload: EntryPayload::Normal(b"a".to_vec()),
|
||||
},
|
||||
LogEntry {
|
||||
log_id: LogId { term: 1, index: 2 },
|
||||
payload: EntryPayload::Normal(b"b".to_vec()),
|
||||
},
|
||||
];
|
||||
storage.append(&entries).unwrap();
|
||||
let got: Vec<LogEntry<Vec<u8>>> = storage.get_log_entries(1..=2).unwrap();
|
||||
assert_eq!(got.len(), 2);
|
||||
assert_eq!(got[0].log_id.index, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "rocksdb-storage"))]
|
||||
pub use mem::{EntryPayload, LogEntry, LogId, LogState, LogStorage, StateMachine, Vote};
|
||||
|
||||
|
||||
274
chainfire/crates/chainfire-raft/tests/proptest_sim.rs
Normal file
274
chainfire/crates/chainfire-raft/tests/proptest_sim.rs
Normal file
|
|
@ -0,0 +1,274 @@
|
|||
//! Property-based tests for `chainfire-raft` using an in-process simulated cluster.
|
||||
//!
|
||||
//! These tests aim to catch timing/partition edge cases with high reproducibility.
|
||||
|
||||
#![cfg(all(test, feature = "custom-raft"))]
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use proptest::prelude::*;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time;
|
||||
|
||||
use chainfire_raft::core::{RaftConfig, RaftCore};
|
||||
use chainfire_raft::network::test_client::{RpcMessage, SimulatedNetwork};
|
||||
use chainfire_raft::storage::{EntryPayload, LogEntry, LogStorage, StateMachine};
|
||||
use chainfire_types::command::RaftCommand;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Op {
|
||||
Tick(u64),
|
||||
Disconnect(u64, u64),
|
||||
Reconnect(u64, u64),
|
||||
Delay(u64, u64, u64),
|
||||
ClearLink(u64, u64),
|
||||
Write(u64, u8, u8),
|
||||
}
|
||||
|
||||
fn node_id() -> impl Strategy<Value = u64> {
|
||||
1_u64..=3_u64
|
||||
}
|
||||
|
||||
fn distinct_pair() -> impl Strategy<Value = (u64, u64)> {
|
||||
(node_id(), node_id()).prop_filter("distinct nodes", |(a, b)| a != b)
|
||||
}
|
||||
|
||||
fn op_strategy() -> impl Strategy<Value = Op> {
|
||||
prop_oneof![
|
||||
// Advance simulated time by up to 300ms.
|
||||
(0_u64..=300).prop_map(Op::Tick),
|
||||
distinct_pair().prop_map(|(a, b)| Op::Disconnect(a, b)),
|
||||
distinct_pair().prop_map(|(a, b)| Op::Reconnect(a, b)),
|
||||
(distinct_pair(), 0_u64..=50).prop_map(|((a, b), d)| Op::Delay(a, b, d)),
|
||||
distinct_pair().prop_map(|(a, b)| Op::ClearLink(a, b)),
|
||||
// Client writes: pick node + small key/value.
|
||||
(node_id(), any::<u8>(), any::<u8>()).prop_map(|(n, k, v)| Op::Write(n, k, v)),
|
||||
]
|
||||
}
|
||||
|
||||
fn ops_strategy() -> impl Strategy<Value = Vec<Op>> {
|
||||
prop::collection::vec(op_strategy(), 0..40)
|
||||
}
|
||||
|
||||
async fn advance_ms(total_ms: u64) {
|
||||
// Advance in small steps to avoid “simultaneous” timer firings starving message handling.
|
||||
let step_ms: u64 = 10;
|
||||
let mut remaining = total_ms;
|
||||
while remaining > 0 {
|
||||
let d = remaining.min(step_ms);
|
||||
time::advance(Duration::from_millis(d)).await;
|
||||
tokio::task::yield_now().await;
|
||||
remaining -= d;
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_3node_cluster() -> (Vec<Arc<RaftCore>>, Arc<SimulatedNetwork>) {
|
||||
let network = Arc::new(SimulatedNetwork::new());
|
||||
let mut nodes = Vec::new();
|
||||
|
||||
for node_id in 1..=3_u64 {
|
||||
let peers: Vec<u64> = (1..=3_u64).filter(|&id| id != node_id).collect();
|
||||
let storage = Arc::new(LogStorage::new_in_memory());
|
||||
let state_machine = Arc::new(StateMachine::new_in_memory());
|
||||
|
||||
let config = RaftConfig {
|
||||
election_timeout_min: 150,
|
||||
election_timeout_max: 300,
|
||||
heartbeat_interval: 50,
|
||||
// Deterministic per-node seed for reproducibility.
|
||||
deterministic_seed: Some(node_id),
|
||||
};
|
||||
|
||||
let node = Arc::new(RaftCore::new(
|
||||
node_id,
|
||||
peers,
|
||||
storage,
|
||||
state_machine,
|
||||
Arc::new(network.client(node_id)) as Arc<dyn chainfire_raft::network::RaftRpcClient>,
|
||||
config,
|
||||
));
|
||||
node.initialize().await.unwrap();
|
||||
nodes.push(node);
|
||||
}
|
||||
|
||||
// Wire up RPC handlers.
|
||||
for node in &nodes {
|
||||
let node_id = node.node_id();
|
||||
let (tx, mut rx) = mpsc::unbounded_channel::<RpcMessage>();
|
||||
network.register(node_id, tx).await;
|
||||
|
||||
let node_clone: Arc<RaftCore> = Arc::clone(node);
|
||||
tokio::spawn(async move {
|
||||
while let Some(msg) = rx.recv().await {
|
||||
match msg {
|
||||
RpcMessage::Vote(req, resp_tx) => {
|
||||
node_clone.request_vote_rpc(req, resp_tx).await;
|
||||
}
|
||||
RpcMessage::AppendEntries(req, resp_tx) => {
|
||||
node_clone.append_entries_rpc(req, resp_tx).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
(nodes, network)
|
||||
}
|
||||
|
||||
fn payload_fingerprint(payload: &EntryPayload<Vec<u8>>) -> Vec<u8> {
|
||||
// Serialize the enum for stable equality checks across variants.
|
||||
bincode::serialize(payload).unwrap_or_default()
|
||||
}
|
||||
|
||||
async fn assert_raft_invariants(nodes: &[Arc<RaftCore>]) {
|
||||
// Per-node monotonic invariants.
|
||||
for node in nodes {
|
||||
let commit = node.commit_index().await;
|
||||
let last_applied = node.last_applied().await;
|
||||
|
||||
let st = node.storage().get_log_state().expect("log state");
|
||||
let last_log_index = st.last_log_id.map(|id| id.index).unwrap_or(0);
|
||||
|
||||
assert!(
|
||||
last_applied <= commit,
|
||||
"node {}: last_applied={} > commit_index={}",
|
||||
node.node_id(),
|
||||
last_applied,
|
||||
commit
|
||||
);
|
||||
assert!(
|
||||
commit <= last_log_index,
|
||||
"node {}: commit_index={} > last_log_index={}",
|
||||
node.node_id(),
|
||||
commit,
|
||||
last_log_index
|
||||
);
|
||||
}
|
||||
|
||||
// Log Matching Property:
|
||||
// If two logs contain an entry with the same index and term, then the logs are identical
|
||||
// for all entries up through that index.
|
||||
let mut node_logs: Vec<std::collections::BTreeMap<u64, (u64, Vec<u8>)>> = Vec::new();
|
||||
for node in nodes {
|
||||
let st = node.storage().get_log_state().expect("log state");
|
||||
let last = st.last_log_id.map(|id| id.index).unwrap_or(0);
|
||||
let entries: Vec<LogEntry<Vec<u8>>> = if last == 0 {
|
||||
vec![]
|
||||
} else {
|
||||
node.storage()
|
||||
.get_log_entries(1..=last)
|
||||
.expect("log entries")
|
||||
};
|
||||
|
||||
let mut m = std::collections::BTreeMap::new();
|
||||
for e in entries {
|
||||
m.insert(e.log_id.index, (e.log_id.term, payload_fingerprint(&e.payload)));
|
||||
}
|
||||
node_logs.push(m);
|
||||
}
|
||||
|
||||
for a in 0..nodes.len() {
|
||||
for b in (a + 1)..nodes.len() {
|
||||
let la = &node_logs[a];
|
||||
let lb = &node_logs[b];
|
||||
|
||||
for (idx, (term_a, payload_a)) in la.iter() {
|
||||
if let Some((term_b, payload_b)) = lb.get(idx) {
|
||||
if term_a == term_b {
|
||||
assert_eq!(
|
||||
payload_a, payload_b,
|
||||
"log mismatch at idx={} term={} (nodes {} vs {})",
|
||||
idx,
|
||||
term_a,
|
||||
nodes[a].node_id(),
|
||||
nodes[b].node_id()
|
||||
);
|
||||
|
||||
for j in 1..=*idx {
|
||||
assert_eq!(
|
||||
la.get(&j),
|
||||
lb.get(&j),
|
||||
"log matching violated at idx={} (prefix {} differs) nodes {} vs {}",
|
||||
idx,
|
||||
j,
|
||||
nodes[a].node_id(),
|
||||
nodes[b].node_id()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#![proptest_config(ProptestConfig {
|
||||
cases: 32,
|
||||
.. ProptestConfig::default()
|
||||
})]
|
||||
|
||||
#[test]
|
||||
fn prop_raft_log_matching_holds(ops in ops_strategy()) {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_time()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
rt.block_on(async move {
|
||||
tokio::time::pause();
|
||||
|
||||
let (nodes, network) = create_3node_cluster().await;
|
||||
|
||||
// Start event loops.
|
||||
let mut handles = Vec::new();
|
||||
for node in &nodes {
|
||||
let node_clone = Arc::clone(node);
|
||||
handles.push(tokio::spawn(async move {
|
||||
let _ = node_clone.run().await;
|
||||
}));
|
||||
}
|
||||
tokio::task::yield_now().await;
|
||||
|
||||
// Drive a randomized sequence of operations.
|
||||
for op in ops {
|
||||
match op {
|
||||
Op::Tick(ms) => advance_ms(ms).await,
|
||||
Op::Disconnect(a, b) => network.disconnect(a, b).await,
|
||||
Op::Reconnect(a, b) => network.reconnect(a, b).await,
|
||||
Op::Delay(a, b, d) => {
|
||||
use chainfire_raft::network::test_client::LinkBehavior;
|
||||
network.set_link(a, b, LinkBehavior::Delay(Duration::from_millis(d))).await;
|
||||
network.set_link(b, a, LinkBehavior::Delay(Duration::from_millis(d))).await;
|
||||
}
|
||||
Op::ClearLink(a, b) => {
|
||||
network.clear_link(a, b).await;
|
||||
network.clear_link(b, a).await;
|
||||
}
|
||||
Op::Write(n, k, v) => {
|
||||
let node = nodes.iter().find(|x| x.node_id() == n).unwrap();
|
||||
let _ = node.client_write(RaftCommand::Put {
|
||||
key: vec![k],
|
||||
value: vec![v],
|
||||
lease_id: None,
|
||||
prev_kv: false,
|
||||
}).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Let the system settle a bit.
|
||||
advance_ms(500).await;
|
||||
|
||||
assert_raft_invariants(&nodes).await;
|
||||
|
||||
// Best-effort cleanup.
|
||||
for h in handles {
|
||||
h.abort();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
1098
client-common/Cargo.lock
generated
Normal file
1098
client-common/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
16
client-common/Cargo.toml
Normal file
16
client-common/Cargo.toml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
[package]
|
||||
name = "photocloud-client-common"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["PhotonCloud"]
|
||||
license = "MIT OR Apache-2.0"
|
||||
description = "Shared client config types (endpoint/auth/retry) for PhotonCloud SDKs"
|
||||
|
||||
[dependencies]
|
||||
tonic = { version = "0.12", features = ["tls"] }
|
||||
tokio = { version = "1", features = ["macros", "rt-multi-thread", "time"] }
|
||||
thiserror = "1"
|
||||
backoff = { version = "0.4", features = ["tokio"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1", features = ["macros", "rt", "time"] }
|
||||
205
client-common/src/lib.rs
Normal file
205
client-common/src/lib.rs
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
//! Shared client config types (endpoint/auth/retry) for PhotonCloud SDKs.
|
||||
//!
|
||||
//! Lightweight, type-only helpers to keep SDK crates consistent without
|
||||
//! forcing a unified SDK dependency tree.
|
||||
|
||||
use std::time::Duration;
|
||||
use backoff::ExponentialBackoffBuilder;
|
||||
use thiserror::Error;
|
||||
use tonic::codegen::InterceptedService;
|
||||
use tonic::service::Interceptor;
|
||||
use tonic::transport::{Channel, ClientTlsConfig, Endpoint};
|
||||
|
||||
/// Errors produced by client-common helpers.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ClientError {
|
||||
#[error("invalid endpoint: {0}")]
|
||||
InvalidEndpoint(String),
|
||||
#[error("TLS configuration error: {0}")]
|
||||
TlsConfig(String),
|
||||
#[error("transport error: {0}")]
|
||||
Transport(#[from] tonic::transport::Error),
|
||||
#[error("auth error: {0}")]
|
||||
Auth(String),
|
||||
}
|
||||
|
||||
/// Endpoint configuration (URI + timeout + optional TLS domain/CA).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EndpointConfig {
|
||||
pub uri: String,
|
||||
pub timeout: Duration,
|
||||
pub tls: Option<TlsConfig>,
|
||||
}
|
||||
|
||||
impl EndpointConfig {
|
||||
pub fn new(uri: impl Into<String>) -> Self {
|
||||
Self {
|
||||
uri: uri.into(),
|
||||
timeout: Duration::from_millis(5_000),
|
||||
tls: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_timeout(mut self, timeout: Duration) -> Self {
|
||||
self.timeout = timeout;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_tls(mut self, tls: TlsConfig) -> Self {
|
||||
self.tls = Some(tls);
|
||||
self
|
||||
}
|
||||
|
||||
/// Build a tonic Endpoint with timeout/TLS applied.
|
||||
pub fn build_endpoint(&self) -> Result<Endpoint, ClientError> {
|
||||
let mut ep = Endpoint::from_shared(self.uri.clone())
|
||||
.map_err(|e| ClientError::InvalidEndpoint(e.to_string()))?
|
||||
.timeout(self.timeout);
|
||||
|
||||
if let Some(tls) = &self.tls {
|
||||
let mut cfg = ClientTlsConfig::new();
|
||||
if let Some(domain) = &tls.domain {
|
||||
cfg = cfg.domain_name(domain.clone());
|
||||
}
|
||||
if let Some(ca_cert) = &tls.ca_cert_pem {
|
||||
cfg = cfg.ca_certificate(tonic::transport::Certificate::from_pem(ca_cert.clone()));
|
||||
}
|
||||
if let (Some(cert), Some(key)) = (&tls.client_cert_pem, &tls.client_key_pem) {
|
||||
cfg = cfg.identity(tonic::transport::Identity::from_pem(
|
||||
cert.clone(),
|
||||
key.clone(),
|
||||
));
|
||||
}
|
||||
ep = ep.tls_config(cfg).map_err(|e| ClientError::TlsConfig(e.to_string()))?;
|
||||
}
|
||||
|
||||
Ok(ep)
|
||||
}
|
||||
}
|
||||
|
||||
/// TLS configuration inputs.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct TlsConfig {
|
||||
pub domain: Option<String>,
|
||||
pub ca_cert_pem: Option<String>,
|
||||
pub client_cert_pem: Option<String>,
|
||||
pub client_key_pem: Option<String>,
|
||||
}
|
||||
|
||||
/// Auth configuration for interceptors.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum AuthConfig {
|
||||
None,
|
||||
Bearer { token: String },
|
||||
AccessKey { id: String, secret: String },
|
||||
}
|
||||
|
||||
impl AuthConfig {
|
||||
pub fn bearer(token: impl Into<String>) -> Self {
|
||||
Self::Bearer { token: token.into() }
|
||||
}
|
||||
}
|
||||
|
||||
/// Retry/backoff configuration.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RetryConfig {
|
||||
pub max_retries: u32,
|
||||
pub base_delay: Duration,
|
||||
pub max_delay: Duration,
|
||||
pub jitter: bool,
|
||||
}
|
||||
|
||||
impl Default for RetryConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_retries: 3,
|
||||
base_delay: Duration::from_millis(100),
|
||||
max_delay: Duration::from_secs(2),
|
||||
jitter: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RetryConfig {
|
||||
pub fn backoff(&self) -> backoff::ExponentialBackoff {
|
||||
let mut builder = ExponentialBackoffBuilder::new();
|
||||
builder
|
||||
.with_initial_interval(self.base_delay)
|
||||
.with_max_interval(self.max_delay)
|
||||
.with_max_elapsed_time(None);
|
||||
if !self.jitter {
|
||||
builder.with_randomization_factor(0.0);
|
||||
}
|
||||
builder.build()
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a channel with optional auth interceptor.
|
||||
pub async fn build_channel(
|
||||
endpoint: &EndpointConfig,
|
||||
auth: &AuthConfig,
|
||||
) -> Result<Channel, ClientError> {
|
||||
let ep = endpoint.build_endpoint()?;
|
||||
let channel = ep.connect().await?;
|
||||
|
||||
match auth {
|
||||
AuthConfig::None => Ok(channel),
|
||||
AuthConfig::Bearer { .. } | AuthConfig::AccessKey { .. } => Ok(channel),
|
||||
}
|
||||
}
|
||||
|
||||
/// Interceptor that injects auth headers; used by clients that need request metadata.
|
||||
#[derive(Clone)]
|
||||
pub struct AuthInterceptor(AuthConfig);
|
||||
|
||||
impl Interceptor for AuthInterceptor {
|
||||
fn call(&mut self, mut req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {
|
||||
match &self.0 {
|
||||
AuthConfig::None => {}
|
||||
AuthConfig::Bearer { token } => {
|
||||
req.metadata_mut()
|
||||
.insert("authorization", format!("Bearer {}", token).parse().unwrap());
|
||||
}
|
||||
AuthConfig::AccessKey { id, secret } => {
|
||||
req.metadata_mut()
|
||||
.insert("x-api-key", id.parse().unwrap());
|
||||
req.metadata_mut()
|
||||
.insert("x-api-secret", secret.parse().unwrap());
|
||||
}
|
||||
}
|
||||
Ok(req)
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an auth interceptor from AuthConfig (for tonic clients that need it).
|
||||
pub fn auth_interceptor(auth: &AuthConfig) -> Option<AuthInterceptor> {
|
||||
match auth {
|
||||
AuthConfig::None => None,
|
||||
_ => Some(AuthInterceptor(auth.clone())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper to wrap a tonic client with an interceptor when auth is provided.
|
||||
pub fn with_auth(channel: Channel, auth: &AuthConfig) -> InterceptedService<Channel, AuthInterceptor> {
|
||||
let interceptor = auth_interceptor(auth).unwrap_or(AuthInterceptor(AuthConfig::None));
|
||||
InterceptedService::new(channel, interceptor)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn retry_config_builds_backoff() {
|
||||
let cfg = RetryConfig::default();
|
||||
let backoff = cfg.backoff();
|
||||
assert!(backoff.initial_interval == cfg.base_delay);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn endpoint_builds_without_tls() {
|
||||
let ep = EndpointConfig::new("http://localhost:50051");
|
||||
let built = ep.build_endpoint().unwrap();
|
||||
assert!(built.uri().to_string().contains("localhost"));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,275 @@
|
|||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use apigateway_api::proto::{
|
||||
CreditCommitRequest, CreditCommitResponse, CreditReserveRequest, CreditReserveResponse,
|
||||
CreditRollbackRequest, CreditRollbackResponse,
|
||||
};
|
||||
use apigateway_api::GatewayCreditService;
|
||||
use creditservice_proto::credit_service_server::CreditService;
|
||||
use creditservice_proto::{CommitReservationRequest, ReleaseReservationRequest, ReserveCreditsRequest};
|
||||
use tonic::{Code, Request, Response, Status};
|
||||
|
||||
use crate::credit_service::CreditServiceImpl;
|
||||
|
||||
pub struct GatewayCreditServiceImpl {
|
||||
credit_service: Arc<CreditServiceImpl>,
|
||||
}
|
||||
|
||||
impl GatewayCreditServiceImpl {
|
||||
pub fn new(credit_service: Arc<CreditServiceImpl>) -> Self {
|
||||
Self { credit_service }
|
||||
}
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl GatewayCreditService for GatewayCreditServiceImpl {
|
||||
async fn reserve(
|
||||
&self,
|
||||
request: Request<CreditReserveRequest>,
|
||||
) -> Result<Response<CreditReserveResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
if req.org_id.trim().is_empty() {
|
||||
return Ok(Response::new(deny_response(
|
||||
"org_id required for credit reservation",
|
||||
)));
|
||||
}
|
||||
|
||||
if req.project_id.trim().is_empty() {
|
||||
return Ok(Response::new(deny_response(
|
||||
"project_id required for credit reservation",
|
||||
)));
|
||||
}
|
||||
|
||||
if req.units == 0 {
|
||||
return Err(Status::invalid_argument("units must be positive"));
|
||||
}
|
||||
|
||||
let amount = i64::try_from(req.units)
|
||||
.map_err(|_| Status::invalid_argument("units exceeds i64 range"))?;
|
||||
let description = reservation_description(&req);
|
||||
let resource_type = reservation_resource_type(&req);
|
||||
let ttl_seconds = reservation_ttl(&req.attributes);
|
||||
|
||||
let reserve_request = ReserveCreditsRequest {
|
||||
project_id: req.project_id.clone(),
|
||||
org_id: req.org_id.clone(),
|
||||
amount,
|
||||
description,
|
||||
resource_type,
|
||||
ttl_seconds,
|
||||
};
|
||||
|
||||
match self
|
||||
.credit_service
|
||||
.reserve_credits(Request::new(reserve_request))
|
||||
.await
|
||||
{
|
||||
Ok(response) => {
|
||||
let response = response.into_inner();
|
||||
let reservation = response.reservation.ok_or_else(|| {
|
||||
Status::internal("credit reservation missing from response")
|
||||
})?;
|
||||
Ok(Response::new(CreditReserveResponse {
|
||||
allow: true,
|
||||
reservation_id: reservation.id,
|
||||
reason: String::new(),
|
||||
remaining: 0,
|
||||
}))
|
||||
}
|
||||
Err(status) => match status.code() {
|
||||
Code::NotFound | Code::FailedPrecondition => {
|
||||
Ok(Response::new(deny_response(status.message())))
|
||||
}
|
||||
Code::InvalidArgument => Err(Status::invalid_argument(status.message())),
|
||||
_ => Err(status),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn commit(
|
||||
&self,
|
||||
request: Request<CreditCommitRequest>,
|
||||
) -> Result<Response<CreditCommitResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
let amount = i64::try_from(req.units)
|
||||
.map_err(|_| Status::invalid_argument("units exceeds i64 range"))?;
|
||||
let commit_request = CommitReservationRequest {
|
||||
reservation_id: req.reservation_id,
|
||||
org_id: String::new(),
|
||||
actual_amount: amount,
|
||||
resource_id: String::new(),
|
||||
};
|
||||
|
||||
match self
|
||||
.credit_service
|
||||
.commit_reservation(Request::new(commit_request))
|
||||
.await
|
||||
{
|
||||
Ok(_) => Ok(Response::new(CreditCommitResponse {
|
||||
success: true,
|
||||
reason: String::new(),
|
||||
})),
|
||||
Err(status) => match status.code() {
|
||||
Code::NotFound | Code::FailedPrecondition => Ok(Response::new(CreditCommitResponse {
|
||||
success: false,
|
||||
reason: status.message().to_string(),
|
||||
})),
|
||||
Code::InvalidArgument => Err(Status::invalid_argument(status.message())),
|
||||
_ => Err(status),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn rollback(
|
||||
&self,
|
||||
request: Request<CreditRollbackRequest>,
|
||||
) -> Result<Response<CreditRollbackResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let rollback_request = ReleaseReservationRequest {
|
||||
reservation_id: req.reservation_id,
|
||||
org_id: String::new(),
|
||||
reason: "gateway rollback".into(),
|
||||
};
|
||||
|
||||
match self
|
||||
.credit_service
|
||||
.release_reservation(Request::new(rollback_request))
|
||||
.await
|
||||
{
|
||||
Ok(response) => {
|
||||
let response = response.into_inner();
|
||||
Ok(Response::new(CreditRollbackResponse {
|
||||
success: response.success,
|
||||
reason: String::new(),
|
||||
}))
|
||||
}
|
||||
Err(status) => match status.code() {
|
||||
Code::NotFound | Code::FailedPrecondition => Ok(Response::new(CreditRollbackResponse {
|
||||
success: false,
|
||||
reason: status.message().to_string(),
|
||||
})),
|
||||
Code::InvalidArgument => Err(Status::invalid_argument(status.message())),
|
||||
_ => Err(status),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn deny_response(reason: impl Into<String>) -> CreditReserveResponse {
|
||||
CreditReserveResponse {
|
||||
allow: false,
|
||||
reservation_id: String::new(),
|
||||
reason: reason.into(),
|
||||
remaining: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn reservation_resource_type(req: &CreditReserveRequest) -> String {
|
||||
if let Some(value) = req.attributes.get("resource_type") {
|
||||
return value.clone();
|
||||
}
|
||||
if !req.route_name.is_empty() {
|
||||
return req.route_name.clone();
|
||||
}
|
||||
"apigateway".to_string()
|
||||
}
|
||||
|
||||
fn reservation_description(req: &CreditReserveRequest) -> String {
|
||||
if let Some(value) = req.attributes.get("description") {
|
||||
return value.clone();
|
||||
}
|
||||
|
||||
let mut parts = vec![format!("route={}", req.route_name)];
|
||||
if !req.method.is_empty() {
|
||||
parts.push(format!("method={}", req.method));
|
||||
}
|
||||
if !req.path.is_empty() {
|
||||
parts.push(format!("path={}", req.path));
|
||||
}
|
||||
if !req.request_id.is_empty() {
|
||||
parts.push(format!("request_id={}", req.request_id));
|
||||
}
|
||||
if !req.subject_id.is_empty() {
|
||||
parts.push(format!("subject_id={}", req.subject_id));
|
||||
}
|
||||
|
||||
format!("gateway {}", parts.join(" "))
|
||||
}
|
||||
|
||||
fn reservation_ttl(attributes: &HashMap<String, String>) -> i32 {
|
||||
attributes
|
||||
.get("ttl_seconds")
|
||||
.and_then(|value| value.parse::<i32>().ok())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::storage::{CreditStorage, InMemoryStorage};
|
||||
use creditservice_types::Wallet;
|
||||
|
||||
fn reserve_request(project_id: &str, units: u64) -> CreditReserveRequest {
|
||||
CreditReserveRequest {
|
||||
request_id: "req-1".into(),
|
||||
subject_id: "subject-1".into(),
|
||||
org_id: "org-1".into(),
|
||||
project_id: project_id.into(),
|
||||
route_name: "route-1".into(),
|
||||
method: "GET".into(),
|
||||
path: "/v1/test".into(),
|
||||
raw_query: "".into(),
|
||||
units,
|
||||
attributes: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_reserve_denied_without_wallet() {
|
||||
let storage = InMemoryStorage::new();
|
||||
let credit_service = Arc::new(CreditServiceImpl::new(storage));
|
||||
let gateway = GatewayCreditServiceImpl::new(credit_service);
|
||||
|
||||
let response = gateway
|
||||
.reserve(Request::new(reserve_request("proj-1", 5)))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
|
||||
assert!(!response.allow);
|
||||
assert!(response.reason.contains("Wallet not found"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_reserve_commit_success() {
|
||||
let storage = InMemoryStorage::new();
|
||||
let wallet = Wallet::new("proj-1".into(), "org-1".into(), 100);
|
||||
storage.create_wallet(wallet).await.unwrap();
|
||||
|
||||
let credit_service = Arc::new(CreditServiceImpl::new(storage));
|
||||
let gateway = GatewayCreditServiceImpl::new(credit_service);
|
||||
|
||||
let reserve_response = gateway
|
||||
.reserve(Request::new(reserve_request("proj-1", 10)))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
|
||||
assert!(reserve_response.allow);
|
||||
assert!(!reserve_response.reservation_id.is_empty());
|
||||
|
||||
let commit_response = gateway
|
||||
.commit(Request::new(CreditCommitRequest {
|
||||
reservation_id: reserve_response.reservation_id,
|
||||
units: 10,
|
||||
}))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
|
||||
assert!(commit_response.success);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
use creditservice_api::{CreditServiceImpl, InMemoryStorage};
|
||||
use creditservice_proto::credit_service_server::CreditServiceServer;
|
||||
use creditservice_client::{Client, TlsConfig};
|
||||
use rcgen::generate_simple_self_signed;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::oneshot;
|
||||
use tonic::transport::{Identity, Server, ServerTlsConfig};
|
||||
|
||||
#[tokio::test]
|
||||
async fn mtls_connects_and_allows_rpc() {
|
||||
// --- Generate self-signed server and client certs ---
|
||||
let server = generate_simple_self_signed(vec!["creditservice.local".into()]).unwrap();
|
||||
let server_cert_pem = server.cert.pem();
|
||||
let server_key_pem = server.key_pair.serialize_pem();
|
||||
|
||||
let client = generate_simple_self_signed(vec!["creditservice-client".into()]).unwrap();
|
||||
let client_cert_pem = client.cert.pem();
|
||||
let client_key_pem = client.key_pair.serialize_pem();
|
||||
|
||||
// --- Start CreditService server with mTLS ---
|
||||
let addr: SocketAddr = "127.0.0.1:50057".parse().unwrap();
|
||||
let storage: Arc<dyn creditservice_api::CreditStorage> = InMemoryStorage::new();
|
||||
let svc = Arc::new(CreditServiceImpl::new(storage));
|
||||
|
||||
let identity = Identity::from_pem(server_cert_pem.clone(), server_key_pem.clone());
|
||||
let client_ca = tonic::transport::Certificate::from_pem(client_cert_pem.clone());
|
||||
|
||||
let (tx, rx) = oneshot::channel::<()>();
|
||||
let server = Server::builder()
|
||||
.tls_config(
|
||||
ServerTlsConfig::new()
|
||||
.identity(identity)
|
||||
.client_ca_root(client_ca),
|
||||
)
|
||||
.unwrap()
|
||||
.add_service(CreditServiceServer::new(svc.as_ref().clone()))
|
||||
.serve_with_shutdown(addr, async {
|
||||
let _ = rx.await;
|
||||
});
|
||||
|
||||
let server_handle = tokio::spawn(server);
|
||||
|
||||
// Give the server a moment to start
|
||||
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
|
||||
|
||||
// --- Client with mTLS ---
|
||||
let mut client = Client::builder(format!("https://127.0.0.1:{}", addr.port()))
|
||||
.tls(TlsConfig {
|
||||
domain: Some("creditservice.local".into()),
|
||||
ca_cert_pem: Some(server_cert_pem.clone()),
|
||||
client_cert_pem: Some(client_cert_pem.clone()),
|
||||
client_key_pem: Some(client_key_pem.clone()),
|
||||
})
|
||||
.build()
|
||||
.await
|
||||
.expect("client build");
|
||||
|
||||
// Simple RPC: create wallet then get wallet
|
||||
let wallet = client
|
||||
.create_wallet("proj-mtls", "org-mtls", 1000)
|
||||
.await
|
||||
.expect("create_wallet");
|
||||
assert_eq!(wallet.project_id, "proj-mtls");
|
||||
assert_eq!(wallet.org_id, "org-mtls");
|
||||
|
||||
let fetched = client
|
||||
.get_wallet("proj-mtls", "org-mtls")
|
||||
.await
|
||||
.expect("get_wallet");
|
||||
assert_eq!(fetched.balance, 1000);
|
||||
|
||||
// Shutdown server
|
||||
let _ = tx.send(());
|
||||
let _ = server_handle.await;
|
||||
}
|
||||
|
||||
18
creditservice/creditservice-client/examples/basic.rs
Normal file
18
creditservice/creditservice-client/examples/basic.rs
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
use creditservice_client::{AuthConfig, ClientBuilder};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Connect to CreditService with default retry/backoff and no auth.
|
||||
let mut client = ClientBuilder::new("http://127.0.0.1:50055")
|
||||
.auth(AuthConfig::None)
|
||||
.build()
|
||||
.await?;
|
||||
|
||||
// Example: check quota call
|
||||
let _ = client
|
||||
.check_quota("project-1", creditservice_client::ResourceType::Vm, 1, 0)
|
||||
.await;
|
||||
|
||||
println!("CreditService client ready");
|
||||
Ok(())
|
||||
}
|
||||
27
creditservice/creditservice-client/examples/builder.rs
Normal file
27
creditservice/creditservice-client/examples/builder.rs
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
//! Minimal builder example for CreditService client
|
||||
use creditservice_client::Client;
|
||||
use photocloud_client_common::AuthConfig;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Point to your CreditService endpoint (plaintext for example only)
|
||||
let mut client = Client::builder("http://127.0.0.1:50052")
|
||||
.auth(AuthConfig::None)
|
||||
.build()
|
||||
.await?;
|
||||
|
||||
// Fetch or create a wallet
|
||||
let project_id = "demo-project";
|
||||
match client.get_wallet(project_id).await {
|
||||
Ok(wallet) => println!("Wallet balance: {}", wallet.balance),
|
||||
Err(status) if status.code() == tonic::Code::NotFound => {
|
||||
let wallet = client
|
||||
.create_wallet(project_id, "demo-org", 1_000)
|
||||
.await?;
|
||||
println!("Created wallet with balance: {}", wallet.balance);
|
||||
}
|
||||
Err(err) => return Err(Box::new(err)),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
25
deployer/crates/cert-authority/Cargo.toml
Normal file
25
deployer/crates/cert-authority/Cargo.toml
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
[package]
|
||||
name = "cert-authority"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
authors.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
tokio.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
rcgen = { version = "0.13", features = ["pem", "x509-parser"] }
|
||||
rand_core = { version = "0.6", features = ["std"] }
|
||||
rustls-pemfile = "2"
|
||||
x509-parser = "0.18"
|
||||
|
||||
chainfire-client = { path = "../../../chainfire/chainfire-client" }
|
||||
|
||||
348
deployer/crates/cert-authority/src/main.rs
Normal file
348
deployer/crates/cert-authority/src/main.rs
Normal file
|
|
@ -0,0 +1,348 @@
|
|||
use std::path::PathBuf;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chainfire_client::Client;
|
||||
use clap::Parser;
|
||||
use rcgen::{Certificate, CertificateParams, DistinguishedName, DnType, KeyPair};
|
||||
use rustls_pemfile::certs;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::{info, warn};
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
const PHOTON_PREFIX: &str = "photoncloud";
|
||||
const CERT_TTL_DAYS: u64 = 90;
|
||||
const ROTATION_THRESHOLD_DAYS: u64 = 30;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about)]
|
||||
struct Cli {
|
||||
#[arg(long)]
|
||||
chainfire_endpoint: String,
|
||||
#[arg(long)]
|
||||
cluster_id: String,
|
||||
#[arg(long)]
|
||||
ca_cert_path: PathBuf,
|
||||
#[arg(long)]
|
||||
ca_key_path: PathBuf,
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
enum Command {
|
||||
/// CA証明書を生成
|
||||
InitCa,
|
||||
/// CSRから証明書を発行
|
||||
Issue {
|
||||
#[arg(long)]
|
||||
csr_path: PathBuf,
|
||||
#[arg(long)]
|
||||
cert_path: PathBuf,
|
||||
#[arg(long)]
|
||||
node_id: Option<String>,
|
||||
#[arg(long)]
|
||||
service_name: Option<String>,
|
||||
},
|
||||
/// 証明書のローテーションが必要かチェック
|
||||
CheckRotation {
|
||||
#[arg(long)]
|
||||
cert_path: PathBuf,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
struct CertificateBinding {
|
||||
node_id: Option<String>,
|
||||
service_name: Option<String>,
|
||||
cert_serial: String,
|
||||
issued_at: u64,
|
||||
expires_at: u64,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env().add_directive("info".parse()?))
|
||||
.init();
|
||||
|
||||
let cli = Cli::parse();
|
||||
|
||||
match cli.command {
|
||||
Command::InitCa => {
|
||||
init_ca(&cli.ca_cert_path, &cli.ca_key_path).await?;
|
||||
}
|
||||
Command::Issue {
|
||||
csr_path,
|
||||
cert_path,
|
||||
node_id,
|
||||
service_name,
|
||||
} => {
|
||||
issue_certificate(
|
||||
&cli.chainfire_endpoint,
|
||||
&cli.cluster_id,
|
||||
&cli.ca_cert_path,
|
||||
&cli.ca_key_path,
|
||||
&csr_path,
|
||||
&cert_path,
|
||||
node_id,
|
||||
service_name,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::CheckRotation { cert_path } => {
|
||||
check_rotation(&cert_path).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn init_ca(cert_path: &PathBuf, key_path: &PathBuf) -> Result<()> {
|
||||
info!("generating CA certificate and key");
|
||||
|
||||
// キーペアを生成
|
||||
let key_pair = KeyPair::generate()
|
||||
.context("failed to generate CA key pair")?;
|
||||
|
||||
// CA証明書パラメータを設定
|
||||
let mut params = CertificateParams::new(vec!["PhotonCloud CA".to_string()])
|
||||
.context("failed to create certificate params")?;
|
||||
|
||||
let mut distinguished_name = DistinguishedName::new();
|
||||
distinguished_name.push(DnType::OrganizationName, "PhotonCloud");
|
||||
distinguished_name.push(DnType::CommonName, "PhotonCloud CA");
|
||||
params.distinguished_name = distinguished_name;
|
||||
params.is_ca = rcgen::IsCa::Ca(rcgen::BasicConstraints::Unconstrained);
|
||||
params.key_usages = vec![
|
||||
rcgen::KeyUsagePurpose::DigitalSignature,
|
||||
rcgen::KeyUsagePurpose::KeyCertSign,
|
||||
];
|
||||
|
||||
// 自己署名CA証明書を生成
|
||||
let cert = params.self_signed(&key_pair)
|
||||
.context("failed to generate self-signed CA certificate")?;
|
||||
|
||||
// PEM形式で保存
|
||||
let cert_pem = cert.pem();
|
||||
let key_pem = key_pair.serialize_pem();
|
||||
|
||||
// ディレクトリを作成
|
||||
if let Some(parent) = cert_path.parent() {
|
||||
std::fs::create_dir_all(parent)
|
||||
.with_context(|| format!("failed to create directory for {}", parent.display()))?;
|
||||
}
|
||||
if let Some(parent) = key_path.parent() {
|
||||
std::fs::create_dir_all(parent)
|
||||
.with_context(|| format!("failed to create directory for {}", parent.display()))?;
|
||||
}
|
||||
|
||||
std::fs::write(cert_path, cert_pem)
|
||||
.with_context(|| format!("failed to write CA certificate to {}", cert_path.display()))?;
|
||||
std::fs::write(key_path, key_pem)
|
||||
.with_context(|| format!("failed to write CA key to {}", key_path.display()))?;
|
||||
|
||||
info!(
|
||||
cert_path = %cert_path.display(),
|
||||
key_path = %key_path.display(),
|
||||
"CA certificate generated successfully"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn issue_certificate(
|
||||
chainfire_endpoint: &str,
|
||||
cluster_id: &str,
|
||||
ca_cert_path: &PathBuf,
|
||||
ca_key_path: &PathBuf,
|
||||
csr_path: &PathBuf,
|
||||
cert_path: &PathBuf,
|
||||
node_id: Option<String>,
|
||||
service_name: Option<String>,
|
||||
) -> Result<()> {
|
||||
info!("issuing certificate");
|
||||
|
||||
// Chainfireでノード/サービスが許可されているか確認
|
||||
if let Some(ref nid) = node_id {
|
||||
let mut client = Client::connect(chainfire_endpoint.to_string()).await?;
|
||||
let node_key = format!("{}nodes/{}", cluster_prefix(cluster_id), nid);
|
||||
let node_data = client.get(&node_key.as_bytes()).await?;
|
||||
if node_data.is_none() {
|
||||
anyhow::bail!("node {} not found in Chainfire", nid);
|
||||
}
|
||||
}
|
||||
|
||||
// CA証明書とキーを読み込み
|
||||
let ca_key_pem = std::fs::read_to_string(ca_key_path)
|
||||
.with_context(|| format!("failed to read CA key from {}", ca_key_path.display()))?;
|
||||
|
||||
// CAキーペアを読み込み
|
||||
let ca_key_pair = KeyPair::from_pem(&ca_key_pem)
|
||||
.context("failed to parse CA key pair from PEM")?;
|
||||
|
||||
// CA証明書を再構築(簡易実装)
|
||||
// 実際の運用では、既存のCA証明書をパースする必要があるが、
|
||||
// rcgenのAPI制約により、CA証明書のパラメータを再構築する方式を採用
|
||||
let mut ca_params = CertificateParams::new(vec!["PhotonCloud CA".to_string()])
|
||||
.context("failed to create CA certificate params")?;
|
||||
let mut ca_dn = DistinguishedName::new();
|
||||
ca_dn.push(DnType::OrganizationName, "PhotonCloud");
|
||||
ca_dn.push(DnType::CommonName, "PhotonCloud CA");
|
||||
ca_params.distinguished_name = ca_dn;
|
||||
ca_params.is_ca = rcgen::IsCa::Ca(rcgen::BasicConstraints::Unconstrained);
|
||||
ca_params.key_usages = vec![
|
||||
rcgen::KeyUsagePurpose::DigitalSignature,
|
||||
rcgen::KeyUsagePurpose::KeyCertSign,
|
||||
];
|
||||
|
||||
// CA証明書オブジェクトを作成(自己署名として再生成)
|
||||
// 実際の運用では、既存のCA証明書を読み込む必要がある
|
||||
let ca_cert = ca_params.self_signed(&ca_key_pair)
|
||||
.context("failed to recreate CA certificate")?;
|
||||
|
||||
// 証明書パラメータを構築
|
||||
let mut subject_alt_names = Vec::new();
|
||||
if let Some(ref nid) = node_id {
|
||||
subject_alt_names.push(format!("node-{}", nid));
|
||||
}
|
||||
if let Some(ref svc) = service_name {
|
||||
subject_alt_names.push(svc.clone());
|
||||
}
|
||||
if subject_alt_names.is_empty() {
|
||||
subject_alt_names.push("photoncloud-service".to_string());
|
||||
}
|
||||
|
||||
let mut params = CertificateParams::new(subject_alt_names)
|
||||
.context("failed to create certificate params")?;
|
||||
|
||||
// Distinguished Nameを設定
|
||||
let mut distinguished_name = DistinguishedName::new();
|
||||
if let Some(ref nid) = node_id {
|
||||
distinguished_name.push(DnType::CommonName, format!("Node {}", nid));
|
||||
}
|
||||
if let Some(ref svc) = service_name {
|
||||
distinguished_name.push(DnType::OrganizationName, format!("Service {}", svc));
|
||||
}
|
||||
params.distinguished_name = distinguished_name;
|
||||
|
||||
// キーペアを生成(CSRから読み込む場合は、CSRパースが必要)
|
||||
// ここでは簡易実装として新規生成
|
||||
let key_pair = KeyPair::generate()
|
||||
.context("failed to generate certificate key pair")?;
|
||||
|
||||
// CA署名証明書を生成
|
||||
// KeyPairはPublicKeyDataトレイトを実装しているので、そのまま渡せる
|
||||
let cert = params.signed_by(&key_pair, &ca_cert, &ca_key_pair)
|
||||
.context("failed to sign certificate with CA")?;
|
||||
|
||||
let cert_pem = cert.pem();
|
||||
let key_pem = key_pair.serialize_pem();
|
||||
|
||||
// ディレクトリを作成
|
||||
if let Some(parent) = cert_path.parent() {
|
||||
std::fs::create_dir_all(parent)
|
||||
.with_context(|| format!("failed to create directory for {}", parent.display()))?;
|
||||
}
|
||||
|
||||
// 証明書とキーを保存
|
||||
std::fs::write(cert_path, cert_pem)
|
||||
.with_context(|| format!("failed to write certificate to {}", cert_path.display()))?;
|
||||
|
||||
// キーも別ファイルに保存(オプション)
|
||||
let key_path = cert_path.with_extension("key");
|
||||
std::fs::write(&key_path, key_pem)
|
||||
.with_context(|| format!("failed to write key to {}", key_path.display()))?;
|
||||
|
||||
// Chainfireに証明書バインディングを記録
|
||||
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs();
|
||||
let expires_at = now + (CERT_TTL_DAYS * 24 * 3600);
|
||||
|
||||
// 証明書のシリアル番号を取得(DERから抽出)
|
||||
let cert_serial = {
|
||||
let cert_der = cert.der();
|
||||
// DER形式からシリアル番号を抽出(簡易実装)
|
||||
// 実際にはx509-parserを使ってパースする方が正確
|
||||
format!("{:x}", cert_der.as_ref().iter().take(20).fold(0u64, |acc, &b| acc * 256 + b as u64))
|
||||
};
|
||||
|
||||
let mut client = Client::connect(chainfire_endpoint.to_string()).await?;
|
||||
let binding = CertificateBinding {
|
||||
node_id: node_id.clone(),
|
||||
service_name: service_name.clone(),
|
||||
cert_serial,
|
||||
issued_at: now,
|
||||
expires_at,
|
||||
};
|
||||
let binding_key = format!(
|
||||
"{}mtls/certs/{}/{}",
|
||||
cluster_prefix(cluster_id),
|
||||
node_id.as_deref().unwrap_or("unknown"),
|
||||
service_name.as_deref().unwrap_or("unknown")
|
||||
);
|
||||
let binding_value = serde_json::to_vec(&binding)?;
|
||||
client.put(&binding_key.as_bytes(), &binding_value).await?;
|
||||
|
||||
info!(
|
||||
cert_path = %cert_path.display(),
|
||||
key_path = %key_path.display(),
|
||||
node_id = ?node_id,
|
||||
service_name = ?service_name,
|
||||
"certificate issued and recorded in Chainfire"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn check_rotation(cert_path: &PathBuf) -> Result<()> {
|
||||
let cert_pem = std::fs::read_to_string(cert_path)
|
||||
.with_context(|| format!("failed to read certificate from {}", cert_path.display()))?;
|
||||
|
||||
// 証明書をDER形式に変換
|
||||
let cert_der_vec = rustls_pemfile::certs(&mut cert_pem.as_bytes())
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.context("failed to parse certificate from PEM")?;
|
||||
let _cert_der = cert_der_vec.first()
|
||||
.context("no certificate found in PEM file")?;
|
||||
|
||||
// x509-parserを使って証明書をパース
|
||||
#[cfg(feature = "x509-parser")]
|
||||
{
|
||||
use x509_parser::parse_x509_certificate;
|
||||
let (_, cert) = parse_x509_certificate(cert_der)
|
||||
.map_err(|e| anyhow::anyhow!("failed to parse X.509 certificate: {:?}", e))?;
|
||||
|
||||
let validity = cert.validity();
|
||||
let not_after = validity.not_after.timestamp();
|
||||
let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as i64;
|
||||
let days_until_expiry = (not_after - now) / 86400;
|
||||
|
||||
if days_until_expiry < ROTATION_THRESHOLD_DAYS as i64 {
|
||||
warn!(
|
||||
cert_path = %cert_path.display(),
|
||||
days_until_expiry = days_until_expiry,
|
||||
threshold = ROTATION_THRESHOLD_DAYS,
|
||||
"certificate should be rotated soon"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!(
|
||||
cert_path = %cert_path.display(),
|
||||
days_until_expiry = days_until_expiry,
|
||||
"certificate is still valid"
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "x509-parser"))]
|
||||
{
|
||||
warn!("x509-parser feature not enabled, rotation check skipped");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cluster_prefix(cluster_id: &str) -> String {
|
||||
format!("{}/clusters/{}/", PHOTON_PREFIX, cluster_id)
|
||||
}
|
||||
|
||||
19
deployer/crates/deployer-ctl/Cargo.toml
Normal file
19
deployer/crates/deployer-ctl/Cargo.toml
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
[package]
|
||||
name = "deployer-ctl"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
tokio = { version = "1.38", features = ["full"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
||||
chainfire-client = { path = "../../../chainfire/chainfire-client" }
|
||||
deployer-types = { path = "../deployer-types" }
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] }
|
||||
|
||||
|
||||
177
deployer/crates/deployer-ctl/src/chainfire.rs
Normal file
177
deployer/crates/deployer-ctl/src/chainfire.rs
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
use std::path::Path;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chainfire_client::Client;
|
||||
use serde::de::DeserializeOwned;
|
||||
use tokio::fs;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::model::ClusterStateSpec;
|
||||
|
||||
const PHOTON_PREFIX: &str = "photoncloud";
|
||||
|
||||
fn cluster_prefix(cluster_id: &str) -> String {
|
||||
format!("{}/clusters/{}/", PHOTON_PREFIX, cluster_id)
|
||||
}
|
||||
|
||||
fn key_cluster_meta(cluster_id: &str) -> Vec<u8> {
|
||||
format!("{}meta", cluster_prefix(cluster_id)).into_bytes()
|
||||
}
|
||||
|
||||
fn key_node(cluster_id: &str, node_id: &str) -> Vec<u8> {
|
||||
format!("{}nodes/{}", cluster_prefix(cluster_id), node_id).into_bytes()
|
||||
}
|
||||
|
||||
fn key_service(cluster_id: &str, svc: &str) -> Vec<u8> {
|
||||
format!("{}services/{}", cluster_prefix(cluster_id), svc).into_bytes()
|
||||
}
|
||||
|
||||
fn key_instance(cluster_id: &str, svc: &str, inst: &str) -> Vec<u8> {
|
||||
format!(
|
||||
"{}instances/{}/{}",
|
||||
cluster_prefix(cluster_id),
|
||||
svc,
|
||||
inst
|
||||
)
|
||||
.into_bytes()
|
||||
}
|
||||
|
||||
fn key_mtls_policy(cluster_id: &str, policy_id: &str) -> Vec<u8> {
|
||||
format!(
|
||||
"{}mtls/policies/{}",
|
||||
cluster_prefix(cluster_id),
|
||||
policy_id
|
||||
)
|
||||
.into_bytes()
|
||||
}
|
||||
|
||||
async fn read_config_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
|
||||
let contents = fs::read_to_string(&path)
|
||||
.await
|
||||
.with_context(|| format!("failed to read {}", path.as_ref().display()))?;
|
||||
|
||||
// シンプルに JSON として解釈(必要になれば YAML 対応も追加可能)
|
||||
let value = serde_json::from_str(&contents)
|
||||
.with_context(|| format!("failed to parse {}", path.as_ref().display()))?;
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
/// 初回ブートストラップ:
|
||||
/// - Cluster メタを作り
|
||||
/// - 少なくとも 1 台分の Node / 必要なら Service/Instance を作成
|
||||
pub async fn bootstrap_cluster(
|
||||
endpoint: &str,
|
||||
cli_cluster_id: Option<&str>,
|
||||
config_path: &Path,
|
||||
) -> Result<()> {
|
||||
let spec: ClusterStateSpec = read_config_file(config_path).await?;
|
||||
let cluster_id = cli_cluster_id.unwrap_or(&spec.cluster.cluster_id);
|
||||
|
||||
info!(cluster_id, "connecting to Chainfire at {}", endpoint);
|
||||
let mut client = Client::connect(endpoint.to_string()).await?;
|
||||
|
||||
// 1. Cluster メタ
|
||||
let meta_key = key_cluster_meta(cluster_id);
|
||||
let meta_value = serde_json::to_vec(&spec.cluster)?;
|
||||
client.put(&meta_key, &meta_value).await?;
|
||||
info!("upserted cluster meta for {}", cluster_id);
|
||||
|
||||
// 2. Node
|
||||
for node in &spec.nodes {
|
||||
let key = key_node(cluster_id, &node.node_id);
|
||||
let value = serde_json::to_vec(node)?;
|
||||
client.put(&key, &value).await?;
|
||||
info!(node_id = %node.node_id, "upserted node");
|
||||
}
|
||||
|
||||
// 3. Service / Instance (必要であれば)
|
||||
for svc in &spec.services {
|
||||
let key = key_service(cluster_id, &svc.name);
|
||||
let value = serde_json::to_vec(svc)?;
|
||||
client.put(&key, &value).await?;
|
||||
info!(service = %svc.name, "upserted service");
|
||||
}
|
||||
|
||||
for inst in &spec.instances {
|
||||
let key = key_instance(cluster_id, &inst.service, &inst.instance_id);
|
||||
let value = serde_json::to_vec(inst)?;
|
||||
client.put(&key, &value).await?;
|
||||
info!(instance = %inst.instance_id, service = %inst.service, "upserted instance");
|
||||
}
|
||||
|
||||
// 4. mTLS Policy
|
||||
for policy in &spec.mtls_policies {
|
||||
let key = key_mtls_policy(cluster_id, &policy.policy_id);
|
||||
let value = serde_json::to_vec(policy)?;
|
||||
client.put(&key, &value).await?;
|
||||
info!(policy_id = %policy.policy_id, "upserted mTLS policy");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// GitOps 的に、「クラスタ全体の宣言」を Chainfire に apply する。
|
||||
/// prune=true の場合、指定 prefix から外れたキーを削除する方向にも拡張可能。
|
||||
pub async fn apply_cluster_state(
|
||||
endpoint: &str,
|
||||
cli_cluster_id: Option<&str>,
|
||||
config_path: &Path,
|
||||
_prune: bool,
|
||||
) -> Result<()> {
|
||||
let spec: ClusterStateSpec = read_config_file(config_path).await?;
|
||||
let cluster_id = cli_cluster_id.unwrap_or(&spec.cluster.cluster_id);
|
||||
|
||||
info!(cluster_id, "applying cluster state to Chainfire at {}", endpoint);
|
||||
let mut client = Client::connect(endpoint.to_string()).await?;
|
||||
|
||||
// MVP としては bootstrap と同じく upsert のみ行う。
|
||||
// 将来的に、既存一覧を取得して差分削除 (prune) を実装できる構造にしておく。
|
||||
let meta_key = key_cluster_meta(cluster_id);
|
||||
let meta_value = serde_json::to_vec(&spec.cluster)?;
|
||||
client.put(&meta_key, &meta_value).await?;
|
||||
|
||||
for node in &spec.nodes {
|
||||
let key = key_node(cluster_id, &node.node_id);
|
||||
let value = serde_json::to_vec(node)?;
|
||||
client.put(&key, &value).await?;
|
||||
}
|
||||
for svc in &spec.services {
|
||||
let key = key_service(cluster_id, &svc.name);
|
||||
let value = serde_json::to_vec(svc)?;
|
||||
client.put(&key, &value).await?;
|
||||
}
|
||||
for inst in &spec.instances {
|
||||
let key = key_instance(cluster_id, &inst.service, &inst.instance_id);
|
||||
let value = serde_json::to_vec(inst)?;
|
||||
client.put(&key, &value).await?;
|
||||
}
|
||||
for policy in &spec.mtls_policies {
|
||||
let key = key_mtls_policy(cluster_id, &policy.policy_id);
|
||||
let value = serde_json::to_vec(policy)?;
|
||||
client.put(&key, &value).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 指定 prefix 以下のキーをダンプする(デバッグ・手動修復用)。
|
||||
pub async fn dump_prefix(endpoint: &str, prefix: &str) -> Result<()> {
|
||||
let mut client = Client::connect(endpoint.to_string()).await?;
|
||||
let start = prefix.as_bytes();
|
||||
|
||||
info!("dumping keys with prefix {:?}", prefix);
|
||||
let (kvs, _next) = client.scan_prefix(start, 0).await?;
|
||||
if kvs.is_empty() {
|
||||
warn!("no keys found under prefix {:?}", prefix);
|
||||
}
|
||||
|
||||
for (key, value, rev) in kvs {
|
||||
let k = String::from_utf8_lossy(&key);
|
||||
let v = String::from_utf8_lossy(&value);
|
||||
println!("rev={} key={} value={}", rev, k, v);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
113
deployer/crates/deployer-ctl/src/main.rs
Normal file
113
deployer/crates/deployer-ctl/src/main.rs
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Result;
|
||||
use clap::{Parser, Subcommand};
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
mod chainfire;
|
||||
mod model;
|
||||
mod remote;
|
||||
|
||||
/// Deployer control CLI for PhotonCloud.
|
||||
///
|
||||
/// - 初回ブートストラップ時に Chainfire 上の Cluster/Node/Service 定義を作成
|
||||
/// - 既存の Deployer クラスタに対して宣言的な設定を apply する
|
||||
/// - Deployer が壊れた場合でも、Chainfire 上の状態を直接修復できることを目標とする
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about)]
|
||||
struct Cli {
|
||||
/// Chainfire API エンドポイント (例: http://127.0.0.1:7000)
|
||||
#[arg(long, global = true, default_value = "http://127.0.0.1:7000")]
|
||||
chainfire_endpoint: String,
|
||||
|
||||
/// PhotonCloud Cluster ID (論理名)
|
||||
#[arg(long, global = true)]
|
||||
cluster_id: Option<String>,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
}
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
enum Command {
|
||||
/// 初回ブートストラップ用の基本オブジェクトを作成する
|
||||
///
|
||||
/// - Cluster メタデータ
|
||||
/// - Node 情報 (ローカルノード1台分)
|
||||
/// - 必要であれば Service/ServiceInstance のシード
|
||||
Bootstrap {
|
||||
/// ブートストラップ用のJSON/YAML設定ファイル
|
||||
#[arg(long)]
|
||||
config: PathBuf,
|
||||
},
|
||||
|
||||
/// 宣言的な PhotonCloud クラスタ設定を Chainfire に apply する (GitOps 的に利用可能)
|
||||
Apply {
|
||||
/// Cluster/Node/Service/Instance/MTLSPolicy を含むJSON/YAML
|
||||
#[arg(long)]
|
||||
config: PathBuf,
|
||||
|
||||
/// 既存エントリを pruning するかどうか
|
||||
#[arg(long, default_value_t = false)]
|
||||
prune: bool,
|
||||
},
|
||||
|
||||
/// Chainfire 上の PhotonCloud 関連キーをダンプする (デバッグ用途)
|
||||
Dump {
|
||||
/// ダンプ対象の prefix (デフォルト: photoncloud/)
|
||||
#[arg(long, default_value = "photoncloud/")]
|
||||
prefix: String,
|
||||
},
|
||||
|
||||
/// Deployer HTTP API を経由して、クラスタ状態を同期・確認する
|
||||
///
|
||||
/// 現時点ではプレースホルダであり、将来的なGitOps連携を見据えた形だけ用意する。
|
||||
Deployer {
|
||||
/// Deployer HTTP エンドポイント (例: http://deployer.local:8080)
|
||||
#[arg(long)]
|
||||
endpoint: String,
|
||||
|
||||
/// 一旦は `status` のみをサポート
|
||||
#[arg(long, default_value = "status")]
|
||||
action: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env().add_directive("info".parse()?))
|
||||
.init();
|
||||
|
||||
let cli = Cli::parse();
|
||||
|
||||
match cli.command {
|
||||
Command::Bootstrap { config } => {
|
||||
chainfire::bootstrap_cluster(
|
||||
&cli.chainfire_endpoint,
|
||||
cli.cluster_id.as_deref(),
|
||||
&config,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::Apply { config, prune } => {
|
||||
chainfire::apply_cluster_state(
|
||||
&cli.chainfire_endpoint,
|
||||
cli.cluster_id.as_deref(),
|
||||
&config,
|
||||
prune,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::Dump { prefix } => {
|
||||
chainfire::dump_prefix(&cli.chainfire_endpoint, &prefix).await?;
|
||||
}
|
||||
Command::Deployer { endpoint, action } => {
|
||||
remote::run_deployer_command(&endpoint, &action).await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
86
deployer/crates/deployer-ctl/src/model.rs
Normal file
86
deployer/crates/deployer-ctl/src/model.rs
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Cluster メタ情報 (PhotonCloud 用)
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct ClusterSpec {
|
||||
pub cluster_id: String,
|
||||
pub environment: Option<String>, // dev/stg/prod など
|
||||
}
|
||||
|
||||
/// Node 定義
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct NodeSpec {
|
||||
pub node_id: String,
|
||||
pub hostname: String,
|
||||
pub ip: String,
|
||||
#[serde(default)]
|
||||
pub roles: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub labels: std::collections::HashMap<String, String>,
|
||||
}
|
||||
|
||||
/// Service 定義
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct ServiceSpec {
|
||||
pub name: String,
|
||||
#[serde(default)]
|
||||
pub ports: Option<ServicePorts>,
|
||||
#[serde(default)]
|
||||
pub protocol: Option<String>, // http/grpc
|
||||
#[serde(default)]
|
||||
pub mtls_required: Option<bool>,
|
||||
#[serde(default)]
|
||||
pub mesh_mode: Option<String>, // agent/none
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct ServicePorts {
|
||||
#[serde(default)]
|
||||
pub http: Option<u16>,
|
||||
#[serde(default)]
|
||||
pub grpc: Option<u16>,
|
||||
}
|
||||
|
||||
/// ServiceInstance 定義
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct ServiceInstanceSpec {
|
||||
pub instance_id: String,
|
||||
pub service: String,
|
||||
pub node_id: String,
|
||||
pub ip: String,
|
||||
pub port: u16,
|
||||
#[serde(default)]
|
||||
pub mesh_port: Option<u16>,
|
||||
#[serde(default)]
|
||||
pub version: Option<String>,
|
||||
}
|
||||
|
||||
/// mTLS Policy 定義
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct MtlsPolicySpec {
|
||||
pub policy_id: String,
|
||||
#[serde(default)]
|
||||
pub environment: Option<String>,
|
||||
pub source_service: String,
|
||||
pub target_service: String,
|
||||
#[serde(default)]
|
||||
pub mtls_required: Option<bool>,
|
||||
#[serde(default)]
|
||||
pub mode: Option<String>, // strict/permissive/disabled
|
||||
}
|
||||
|
||||
/// GitOps フレンドリーな、クラスタ全体の宣言的定義
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct ClusterStateSpec {
|
||||
pub cluster: ClusterSpec,
|
||||
#[serde(default)]
|
||||
pub nodes: Vec<NodeSpec>,
|
||||
#[serde(default)]
|
||||
pub services: Vec<ServiceSpec>,
|
||||
#[serde(default)]
|
||||
pub instances: Vec<ServiceInstanceSpec>,
|
||||
#[serde(default)]
|
||||
pub mtls_policies: Vec<MtlsPolicySpec>,
|
||||
}
|
||||
|
||||
|
||||
35
deployer/crates/deployer-ctl/src/remote.rs
Normal file
35
deployer/crates/deployer-ctl/src/remote.rs
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
use anyhow::Result;
|
||||
use tracing::{info, warn};
|
||||
|
||||
/// 将来的な GitOps 連携を見据えた Deployer HTTP API との接続ポイント。
|
||||
///
|
||||
/// 現時点ではステータスの簡易チェックのみを行い、
|
||||
/// API 形状が固まり次第ここから `apply` 相当を実装できるようにしておく。
|
||||
pub async fn run_deployer_command(endpoint: &str, action: &str) -> Result<()> {
|
||||
match action {
|
||||
"status" => {
|
||||
// プレースホルダ実装:
|
||||
// - 将来的には /health や /api/v1/admin/nodes 等を叩く。
|
||||
let url = format!("{}/health", endpoint.trim_end_matches('/'));
|
||||
info!("checking deployer status at {}", url);
|
||||
|
||||
let response = reqwest::get(&url).await?;
|
||||
if response.status().is_success() {
|
||||
let body = response.text().await.unwrap_or_default();
|
||||
println!("deployer status OK: {}", body);
|
||||
} else {
|
||||
warn!(
|
||||
"deployer status not OK: HTTP {}",
|
||||
response.status().as_u16()
|
||||
);
|
||||
}
|
||||
}
|
||||
other => {
|
||||
warn!("unsupported deployer action: {}", other);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
23
deployer/crates/node-agent/Cargo.toml
Normal file
23
deployer/crates/node-agent/Cargo.toml
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
[package]
|
||||
name = "node-agent"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
authors.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
tokio.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
|
||||
chainfire-client = { path = "../../../chainfire/chainfire-client" }
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "json"] }
|
||||
|
||||
|
||||
334
deployer/crates/node-agent/src/agent.rs
Normal file
334
deployer/crates/node-agent/src/agent.rs
Normal file
|
|
@ -0,0 +1,334 @@
|
|||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chainfire_client::Client;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::process::Command;
|
||||
use tokio::time::sleep;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::process::ProcessManager;
|
||||
|
||||
const PHOTON_PREFIX: &str = "photoncloud";
|
||||
|
||||
fn cluster_prefix(cluster_id: &str) -> String {
|
||||
format!("{}/clusters/{}/", PHOTON_PREFIX, cluster_id)
|
||||
}
|
||||
|
||||
fn key_node(cluster_id: &str, node_id: &str) -> Vec<u8> {
|
||||
format!("{}nodes/{}", cluster_prefix(cluster_id), node_id).into_bytes()
|
||||
}
|
||||
|
||||
fn key_instance(cluster_id: &str, service: &str, instance_id: &str) -> Vec<u8> {
|
||||
format!(
|
||||
"{}instances/{}/{}",
|
||||
cluster_prefix(cluster_id),
|
||||
service,
|
||||
instance_id
|
||||
)
|
||||
.into_bytes()
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct NodeState {
|
||||
pub node_id: String,
|
||||
pub ip: String,
|
||||
pub hostname: String,
|
||||
#[serde(default)]
|
||||
pub roles: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub labels: std::collections::HashMap<String, String>,
|
||||
#[serde(default)]
|
||||
pub state: Option<String>,
|
||||
#[serde(default)]
|
||||
pub last_heartbeat: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
pub struct Agent {
|
||||
endpoint: String,
|
||||
cluster_id: String,
|
||||
node_id: String,
|
||||
interval: Duration,
|
||||
process_manager: ProcessManager,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
struct LocalInstanceSpec {
|
||||
service: String,
|
||||
instance_id: String,
|
||||
ip: String,
|
||||
port: u16,
|
||||
#[serde(default)]
|
||||
mesh_port: Option<u16>,
|
||||
#[serde(default)]
|
||||
health_check: Option<HealthCheckSpec>,
|
||||
#[serde(default)]
|
||||
process: Option<ProcessSpec>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
struct HealthCheckSpec {
|
||||
#[serde(rename = "type")]
|
||||
check_type: String, // http/tcp/command
|
||||
#[serde(default)]
|
||||
path: Option<String>,
|
||||
#[serde(default)]
|
||||
interval_secs: Option<u64>,
|
||||
#[serde(default)]
|
||||
timeout_secs: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
struct ProcessSpec {
|
||||
command: String,
|
||||
#[serde(default)]
|
||||
args: Vec<String>,
|
||||
#[serde(default)]
|
||||
working_dir: Option<String>,
|
||||
#[serde(default)]
|
||||
env: std::collections::HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl Agent {
|
||||
pub fn new(endpoint: String, cluster_id: String, node_id: String, interval: Duration) -> Self {
|
||||
Self {
|
||||
endpoint,
|
||||
cluster_id,
|
||||
node_id,
|
||||
interval,
|
||||
process_manager: ProcessManager::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run_loop(&mut self) -> Result<()> {
|
||||
loop {
|
||||
if let Err(e) = self.tick().await {
|
||||
warn!(error = %e, "node-agent tick failed");
|
||||
}
|
||||
sleep(self.interval).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn tick(&mut self) -> Result<()> {
|
||||
let mut client = Client::connect(self.endpoint.clone()).await?;
|
||||
|
||||
// Node 情報
|
||||
let node_key = key_node(&self.cluster_id, &self.node_id);
|
||||
let node_raw = client.get(&node_key).await?;
|
||||
let Some(node_bytes) = node_raw else {
|
||||
warn!(
|
||||
"node definition not found in Chainfire for cluster_id={}, node_id={}",
|
||||
self.cluster_id, self.node_id
|
||||
);
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let mut node: NodeState = match serde_json::from_slice(&node_bytes) {
|
||||
Ok(n) => n,
|
||||
Err(e) => {
|
||||
warn!(error = %e, "failed to parse node JSON");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
// Heartbeat を更新し、Chainfire 上の Node を upsert
|
||||
node.last_heartbeat = Some(Utc::now());
|
||||
let updated = serde_json::to_vec(&node)?;
|
||||
client.put(&node_key, &updated).await?;
|
||||
|
||||
// ローカル定義された ServiceInstance を Chainfire に登録
|
||||
if let Err(e) = self.sync_local_instances(&mut client).await {
|
||||
warn!(error = %e, "failed to sync local service instances");
|
||||
}
|
||||
|
||||
// プロセスの起動/停止をReconcile
|
||||
if let Err(e) = self.reconcile_processes(&mut client).await {
|
||||
warn!(error = %e, "failed to reconcile processes");
|
||||
}
|
||||
|
||||
// ヘルスチェックを実行して状態を更新
|
||||
if let Err(e) = self.update_health_status(&mut client).await {
|
||||
warn!(error = %e, "failed to update health status");
|
||||
}
|
||||
|
||||
self.log_node_only(&node);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn log_node_only(&self, node: &NodeState) {
|
||||
info!(
|
||||
node_id = %node.node_id,
|
||||
hostname = %node.hostname,
|
||||
roles = ?node.roles,
|
||||
"observed desired state for node"
|
||||
);
|
||||
}
|
||||
|
||||
/// ローカルファイル (/etc/photoncloud/instances.json) から ServiceInstance 定義を読み、
|
||||
/// Chainfire 上の `photoncloud/clusters/{cluster_id}/instances/{service}/{instance_id}` に upsert する。
|
||||
async fn sync_local_instances(&self, client: &mut Client) -> Result<()> {
|
||||
let path = PathBuf::from("/etc/photoncloud/instances.json");
|
||||
let contents = match fs::read_to_string(&path) {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
warn!(error = %e, "no local instances file found, skipping");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
let instances: Vec<LocalInstanceSpec> = serde_json::from_str(&contents)
|
||||
.with_context(|| format!("failed to parse {}", path.display()))?;
|
||||
|
||||
for inst in &instances {
|
||||
let key = key_instance(&self.cluster_id, &inst.service, &inst.instance_id);
|
||||
let value = serde_json::to_vec(inst)?;
|
||||
client.put(&key, &value).await?;
|
||||
info!(
|
||||
service = %inst.service,
|
||||
instance_id = %inst.instance_id,
|
||||
"synced local ServiceInstance to Chainfire"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Desired Stateに基づいてプロセスを起動/停止する
|
||||
async fn reconcile_processes(&mut self, client: &mut Client) -> Result<()> {
|
||||
let prefix = format!("{}instances/", cluster_prefix(&self.cluster_id));
|
||||
let (kvs, _) = client.scan_prefix(prefix.as_bytes(), 0).await?;
|
||||
|
||||
let mut desired_instances = Vec::new();
|
||||
for (_key, value, _) in kvs {
|
||||
let inst: LocalInstanceSpec = match serde_json::from_slice(&value) {
|
||||
Ok(i) => i,
|
||||
Err(e) => {
|
||||
warn!(error = %e, "failed to parse instance");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
// このノードのインスタンスかチェック(簡易実装)
|
||||
// TODO: instance.node_idとself.node_idを比較する
|
||||
|
||||
if let Some(proc_spec) = inst.process {
|
||||
desired_instances.push((inst.service.clone(), inst.instance_id.clone(), proc_spec));
|
||||
}
|
||||
}
|
||||
|
||||
// Desired Stateに基づいてプロセスを管理
|
||||
for (service, instance_id, proc_spec) in desired_instances {
|
||||
let proc_spec_converted = crate::process::ProcessSpec {
|
||||
command: proc_spec.command.clone(),
|
||||
args: proc_spec.args.clone(),
|
||||
working_dir: proc_spec.working_dir.clone(),
|
||||
env: proc_spec.env.clone(),
|
||||
};
|
||||
|
||||
if self.process_manager.get_mut(&service, &instance_id).is_none() {
|
||||
// 新しいプロセスを追加
|
||||
self.process_manager.add(service.clone(), instance_id.clone(), proc_spec_converted);
|
||||
info!(
|
||||
service = %service,
|
||||
instance_id = %instance_id,
|
||||
"added new process to manager"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Reconcile: 停止しているプロセスを再起動
|
||||
self.process_manager.reconcile().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 各ServiceInstanceのヘルスチェックを実行し、Chainfire上の状態を更新
|
||||
async fn update_health_status(&self, client: &mut Client) -> Result<()> {
|
||||
let prefix = format!("{}instances/", cluster_prefix(&self.cluster_id));
|
||||
let (kvs, _) = client.scan_prefix(prefix.as_bytes(), 0).await?;
|
||||
|
||||
for (key, value, _) in kvs {
|
||||
let mut inst: LocalInstanceSpec = match serde_json::from_slice(&value) {
|
||||
Ok(i) => i,
|
||||
Err(e) => {
|
||||
warn!(error = %e, "failed to parse instance");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let health_status = if let Some(ref health_check) = inst.health_check {
|
||||
self.check_health(&inst, health_check).await
|
||||
} else {
|
||||
"healthy".to_string() // デフォルトはhealthy
|
||||
};
|
||||
|
||||
// Chainfire上のServiceInstanceに状態を反映(簡易実装)
|
||||
// 実際には、ServiceInstanceのstateフィールドを更新する必要がある
|
||||
info!(
|
||||
service = %inst.service,
|
||||
instance_id = %inst.instance_id,
|
||||
status = %health_status,
|
||||
"health check completed"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn check_health(&self, inst: &LocalInstanceSpec, spec: &HealthCheckSpec) -> String {
|
||||
match spec.check_type.as_str() {
|
||||
"http" => {
|
||||
if let Some(ref path) = spec.path {
|
||||
let url = format!("http://{}:{}{}", inst.ip, inst.port, path);
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(spec.timeout_secs.unwrap_or(5)))
|
||||
.build();
|
||||
if let Ok(c) = client {
|
||||
if let Ok(resp) = c.get(&url).send().await {
|
||||
if resp.status().is_success() {
|
||||
return "healthy".to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"unhealthy".to_string()
|
||||
}
|
||||
"tcp" => {
|
||||
use tokio::net::TcpStream;
|
||||
let addr = format!("{}:{}", inst.ip, inst.port);
|
||||
match tokio::time::timeout(
|
||||
Duration::from_secs(spec.timeout_secs.unwrap_or(5)),
|
||||
TcpStream::connect(&addr),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(_)) => "healthy".to_string(),
|
||||
_ => "unhealthy".to_string(),
|
||||
}
|
||||
}
|
||||
"command" => {
|
||||
if let Some(ref cmd) = spec.path {
|
||||
match Command::new("sh")
|
||||
.arg("-c")
|
||||
.arg(cmd)
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.status()
|
||||
.await
|
||||
{
|
||||
Ok(status) if status.success() => "healthy".to_string(),
|
||||
_ => "unhealthy".to_string(),
|
||||
}
|
||||
} else {
|
||||
"unknown".to_string()
|
||||
}
|
||||
}
|
||||
_ => "unknown".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
62
deployer/crates/node-agent/src/main.rs
Normal file
62
deployer/crates/node-agent/src/main.rs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use tracing::{info, warn};
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
mod agent;
|
||||
mod process;
|
||||
|
||||
/// PhotonCloud NodeAgent
|
||||
///
|
||||
/// - Chainfire 上の `photoncloud/clusters/{cluster_id}/nodes/{node_id}` と
|
||||
/// `.../instances/*` をポーリング/将来的には watch してローカル状態と比較する。
|
||||
/// - 現段階では systemd などへの実際の apply は行わず、ログ出力のみ。
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about)]
|
||||
struct Cli {
|
||||
/// Chainfire API エンドポイント
|
||||
#[arg(long, default_value = "http://127.0.0.1:7000")]
|
||||
chainfire_endpoint: String,
|
||||
|
||||
/// PhotonCloud Cluster ID
|
||||
#[arg(long)]
|
||||
cluster_id: String,
|
||||
|
||||
/// このエージェントが管理する Node ID
|
||||
#[arg(long)]
|
||||
node_id: String,
|
||||
|
||||
/// ポーリング間隔(秒)
|
||||
#[arg(long, default_value_t = 15)]
|
||||
interval_secs: u64,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env().add_directive("info".parse()?))
|
||||
.init();
|
||||
|
||||
let cli = Cli::parse();
|
||||
|
||||
info!(
|
||||
cluster_id = %cli.cluster_id,
|
||||
node_id = %cli.node_id,
|
||||
"starting NodeAgent"
|
||||
);
|
||||
|
||||
let mut agent = agent::Agent::new(
|
||||
cli.chainfire_endpoint,
|
||||
cli.cluster_id,
|
||||
cli.node_id,
|
||||
Duration::from_secs(cli.interval_secs),
|
||||
);
|
||||
|
||||
agent.run_loop().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
273
deployer/crates/node-agent/src/process.rs
Normal file
273
deployer/crates/node-agent/src/process.rs
Normal file
|
|
@ -0,0 +1,273 @@
|
|||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::process::{Child, Command};
|
||||
use tracing::{info, warn};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ProcessSpec {
|
||||
pub command: String,
|
||||
#[serde(default)]
|
||||
pub args: Vec<String>,
|
||||
#[serde(default)]
|
||||
pub working_dir: Option<String>,
|
||||
#[serde(default)]
|
||||
pub env: HashMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ManagedProcess {
|
||||
pub service: String,
|
||||
pub instance_id: String,
|
||||
pub spec: ProcessSpec,
|
||||
pub child: Option<Child>,
|
||||
pub pid_file: PathBuf,
|
||||
}
|
||||
|
||||
impl ManagedProcess {
|
||||
pub fn new(service: String, instance_id: String, spec: ProcessSpec) -> Self {
|
||||
let pid_file = PathBuf::from(format!(
|
||||
"/var/run/photoncloud/{}-{}.pid",
|
||||
service, instance_id
|
||||
));
|
||||
Self {
|
||||
service,
|
||||
instance_id,
|
||||
spec,
|
||||
child: None,
|
||||
pid_file,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn start(&mut self) -> Result<()> {
|
||||
if self.is_running().await? {
|
||||
info!(
|
||||
service = %self.service,
|
||||
instance_id = %self.instance_id,
|
||||
"process already running"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!(
|
||||
service = %self.service,
|
||||
instance_id = %self.instance_id,
|
||||
command = %self.spec.command,
|
||||
"starting process"
|
||||
);
|
||||
|
||||
let mut cmd = Command::new(&self.spec.command);
|
||||
cmd.args(&self.spec.args);
|
||||
|
||||
if let Some(ref wd) = self.spec.working_dir {
|
||||
cmd.current_dir(wd);
|
||||
}
|
||||
|
||||
for (k, v) in &self.spec.env {
|
||||
cmd.env(k, v);
|
||||
}
|
||||
|
||||
cmd.stdout(Stdio::null()).stderr(Stdio::null());
|
||||
|
||||
let child = cmd.spawn().with_context(|| {
|
||||
format!(
|
||||
"failed to spawn process for {}/{}",
|
||||
self.service, self.instance_id
|
||||
)
|
||||
})?;
|
||||
|
||||
let pid = child.id().context("failed to get child PID")?;
|
||||
|
||||
// PIDファイルを書き込み
|
||||
if let Some(parent) = self.pid_file.parent() {
|
||||
fs::create_dir_all(parent).ok();
|
||||
}
|
||||
fs::write(&self.pid_file, pid.to_string())
|
||||
.with_context(|| format!("failed to write PID file {:?}", self.pid_file))?;
|
||||
|
||||
self.child = Some(child);
|
||||
|
||||
info!(
|
||||
service = %self.service,
|
||||
instance_id = %self.instance_id,
|
||||
pid = pid,
|
||||
"process started"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn stop(&mut self) -> Result<()> {
|
||||
if !self.is_running().await? {
|
||||
info!(
|
||||
service = %self.service,
|
||||
instance_id = %self.instance_id,
|
||||
"process not running"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!(
|
||||
service = %self.service,
|
||||
instance_id = %self.instance_id,
|
||||
"stopping process"
|
||||
);
|
||||
|
||||
if let Some(mut child) = self.child.take() {
|
||||
child.kill().await.ok();
|
||||
child.wait().await.ok();
|
||||
} else {
|
||||
// PIDファイルからPIDを読み取って停止
|
||||
if let Ok(pid_str) = fs::read_to_string(&self.pid_file) {
|
||||
if let Ok(pid) = pid_str.trim().parse::<u32>() {
|
||||
// SIGTERM送信(簡易実装)
|
||||
Command::new("kill")
|
||||
.arg(pid.to_string())
|
||||
.output()
|
||||
.await
|
||||
.ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PIDファイルを削除
|
||||
fs::remove_file(&self.pid_file).ok();
|
||||
|
||||
info!(
|
||||
service = %self.service,
|
||||
instance_id = %self.instance_id,
|
||||
"process stopped"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn is_running(&self) -> Result<bool> {
|
||||
// PIDファイルが存在するかチェック
|
||||
if !self.pid_file.exists() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// PIDファイルからPIDを読み取り
|
||||
let pid_str = fs::read_to_string(&self.pid_file)
|
||||
.with_context(|| format!("failed to read PID file {:?}", self.pid_file))?;
|
||||
let pid = pid_str
|
||||
.trim()
|
||||
.parse::<u32>()
|
||||
.with_context(|| format!("invalid PID in file {:?}", self.pid_file))?;
|
||||
|
||||
// プロセスが存在するかチェック(簡易実装)
|
||||
let output = Command::new("kill")
|
||||
.arg("-0")
|
||||
.arg(pid.to_string())
|
||||
.output()
|
||||
.await
|
||||
.with_context(|| format!("failed to check process {}", pid))?;
|
||||
|
||||
Ok(output.status.success())
|
||||
}
|
||||
|
||||
pub async fn restart(&mut self) -> Result<()> {
|
||||
self.stop().await?;
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
|
||||
self.start().await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ProcessManager {
|
||||
processes: HashMap<String, ManagedProcess>,
|
||||
}
|
||||
|
||||
impl ProcessManager {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
processes: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&mut self, service: String, instance_id: String, spec: ProcessSpec) {
|
||||
let key = format!("{}/{}", service, instance_id);
|
||||
let process = ManagedProcess::new(service, instance_id, spec);
|
||||
self.processes.insert(key, process);
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, service: &str, instance_id: &str) -> Option<ManagedProcess> {
|
||||
let key = format!("{}/{}", service, instance_id);
|
||||
self.processes.remove(&key)
|
||||
}
|
||||
|
||||
pub fn get_mut(&mut self, service: &str, instance_id: &str) -> Option<&mut ManagedProcess> {
|
||||
let key = format!("{}/{}", service, instance_id);
|
||||
self.processes.get_mut(&key)
|
||||
}
|
||||
|
||||
pub async fn start_all(&mut self) -> Result<()> {
|
||||
for (_, process) in self.processes.iter_mut() {
|
||||
if let Err(e) = process.start().await {
|
||||
warn!(
|
||||
service = %process.service,
|
||||
instance_id = %process.instance_id,
|
||||
error = %e,
|
||||
"failed to start process"
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn stop_all(&mut self) -> Result<()> {
|
||||
for (_, process) in self.processes.iter_mut() {
|
||||
if let Err(e) = process.stop().await {
|
||||
warn!(
|
||||
service = %process.service,
|
||||
instance_id = %process.instance_id,
|
||||
error = %e,
|
||||
"failed to stop process"
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn reconcile(&mut self) -> Result<()> {
|
||||
for (_, process) in self.processes.iter_mut() {
|
||||
match process.is_running().await {
|
||||
Ok(true) => {
|
||||
// プロセスは実行中、何もしない
|
||||
}
|
||||
Ok(false) => {
|
||||
// プロセスが停止しているので起動
|
||||
warn!(
|
||||
service = %process.service,
|
||||
instance_id = %process.instance_id,
|
||||
"process is not running, restarting"
|
||||
);
|
||||
if let Err(e) = process.start().await {
|
||||
warn!(
|
||||
service = %process.service,
|
||||
instance_id = %process.instance_id,
|
||||
error = %e,
|
||||
"failed to restart process"
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
service = %process.service,
|
||||
instance_id = %process.instance_id,
|
||||
error = %e,
|
||||
"failed to check process status"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
77
deployer/crates/node-agent/src/watcher.rs
Normal file
77
deployer/crates/node-agent/src/watcher.rs
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use chainfire_client::Client;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::time::sleep;
|
||||
use tracing::{info, warn};
|
||||
|
||||
pub struct ChainfireWatcher {
|
||||
endpoint: String,
|
||||
prefix: String,
|
||||
interval: Duration,
|
||||
}
|
||||
|
||||
impl ChainfireWatcher {
|
||||
pub fn new(endpoint: String, prefix: String, interval_secs: u64) -> Self {
|
||||
Self {
|
||||
endpoint,
|
||||
prefix,
|
||||
interval: Duration::from_secs(interval_secs),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn watch<F>(&self, mut callback: F) -> Result<()>
|
||||
where
|
||||
F: FnMut(Vec<(Vec<u8>, Vec<u8>)>) -> Result<()>,
|
||||
{
|
||||
let mut last_revision = 0u64;
|
||||
|
||||
loop {
|
||||
match self.fetch_updates(last_revision).await {
|
||||
Ok((kvs, max_rev)) => {
|
||||
if !kvs.is_empty() {
|
||||
info!(
|
||||
prefix = %self.prefix,
|
||||
count = kvs.len(),
|
||||
"detected changes in Chainfire"
|
||||
);
|
||||
if let Err(e) = callback(kvs) {
|
||||
warn!(error = %e, "callback failed");
|
||||
}
|
||||
}
|
||||
if max_rev > last_revision {
|
||||
last_revision = max_rev;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "failed to fetch updates from Chainfire");
|
||||
}
|
||||
}
|
||||
|
||||
sleep(self.interval).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_updates(&self, last_revision: u64) -> Result<(Vec<(Vec<u8>, Vec<u8>)>, u64)> {
|
||||
let mut client = Client::connect(self.endpoint.clone()).await?;
|
||||
let (kvs, _) = client.scan_prefix(self.prefix.as_bytes(), 0).await?;
|
||||
|
||||
// 簡易実装: 全てのKVペアを返す(revisionフィルタリングは未実装)
|
||||
let mut max_rev = last_revision;
|
||||
let mut result = Vec::new();
|
||||
for (k, v, rev) in kvs {
|
||||
if rev > last_revision {
|
||||
result.push((k, v));
|
||||
if rev > max_rev {
|
||||
max_rev = rev;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((result, max_rev))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
20
deployer/crates/plasmacloud-reconciler/Cargo.toml
Normal file
20
deployer/crates/plasmacloud-reconciler/Cargo.toml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
[package]
|
||||
name = "plasmacloud-reconciler"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
authors.workspace = true
|
||||
license.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
tokio.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
fiberlb-api.workspace = true
|
||||
flashdns-api.workspace = true
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
tonic = "0.12"
|
||||
1918
deployer/crates/plasmacloud-reconciler/src/main.rs
Normal file
1918
deployer/crates/plasmacloud-reconciler/src/main.rs
Normal file
File diff suppressed because it is too large
Load diff
398
docs/Nix-NOS.md
Normal file
398
docs/Nix-NOS.md
Normal file
|
|
@ -0,0 +1,398 @@
|
|||
# PlasmaCloud/PhotonCloud と Nix-NOS の統合分析
|
||||
|
||||
## Architecture Decision (2025-12-13)
|
||||
|
||||
**決定:** Nix-NOSを汎用ネットワークモジュールとして別リポジトリに分離する。
|
||||
|
||||
### Three-Layer Architecture
|
||||
|
||||
```
|
||||
Layer 3: PlasmaCloud Cluster (T061)
|
||||
- plasmacloud-cluster.nix
|
||||
- cluster-config.json生成
|
||||
- Deployer (Rust)
|
||||
depends on ↓
|
||||
|
||||
Layer 2: PlasmaCloud Network (T061)
|
||||
- plasmacloud-network.nix
|
||||
- FiberLB BGP連携
|
||||
- PrismNET統合
|
||||
depends on ↓
|
||||
|
||||
Layer 1: Nix-NOS Generic (T062) ← 別リポジトリ
|
||||
- BGP (BIRD2/GoBGP)
|
||||
- VLAN
|
||||
- Network interfaces
|
||||
- PlasmaCloudを知らない汎用モジュール
|
||||
```
|
||||
|
||||
### Repository Structure
|
||||
|
||||
- **github.com/centra/nix-nos**: Layer 1 (汎用、VyOS/OpenWrt代替)
|
||||
- **github.com/centra/plasmacloud**: Layers 2+3 (既存リポジトリ)
|
||||
|
||||
---
|
||||
|
||||
## 1. 既存プロジェクトの概要
|
||||
|
||||
PlasmaCloud(PhotonCloud)は、以下のコンポーネントで構成されるクラウド基盤プロジェクト:
|
||||
|
||||
### コアサービス
|
||||
| コンポーネント | 役割 | 技術スタック |
|
||||
|---------------|------|-------------|
|
||||
| **ChainFire** | 分散KVストア(etcd互換) | Rust, Raft (openraft) |
|
||||
| **FlareDB** | SQLデータベース | Rust, KVバックエンド |
|
||||
| **IAM** | 認証・認可 | Rust, JWT/mTLS |
|
||||
| **PlasmaVMC** | VM管理 | Rust, KVM/FireCracker |
|
||||
| **PrismNET** | オーバーレイネットワーク | Rust, OVN連携 |
|
||||
| **LightningSTOR** | オブジェクトストレージ | Rust, S3互換 |
|
||||
| **FlashDNS** | DNS | Rust, hickory-dns |
|
||||
| **FiberLB** | ロードバランサー | Rust, L4/L7, BGP予定 |
|
||||
| **NightLight** | メトリクス | Rust, Prometheus互換 |
|
||||
| **k8shost** | コンテナオーケストレーション | Rust, K8s API互換 |
|
||||
|
||||
### インフラ層
|
||||
- **NixOSモジュール**: 各サービス用 (`nix/modules/`)
|
||||
- **first-boot-automation**: 自動クラスタ参加
|
||||
- **PXE/Netboot**: ベアメタルプロビジョニング
|
||||
- **TLS証明書管理**: 開発用証明書生成スクリプト
|
||||
|
||||
---
|
||||
|
||||
## 2. Nix-NOS との統合ポイント
|
||||
|
||||
### 2.1 Baremetal Provisioning → Deployer強化
|
||||
|
||||
**既存の実装:**
|
||||
```
|
||||
first-boot-automation.nix
|
||||
├── cluster-config.json による設定注入
|
||||
├── bootstrap vs join の自動判定
|
||||
├── マーカーファイルによる冪等性
|
||||
└── systemd サービス連携
|
||||
```
|
||||
|
||||
**Nix-NOSで追加すべき機能:**
|
||||
|
||||
| 既存 | Nix-NOS追加 |
|
||||
|------|-------------|
|
||||
| cluster-config.json (手動作成) | topology.nix から自動生成 |
|
||||
| 単一クラスタ構成 | 複数クラスタ/サイト対応 |
|
||||
| nixos-anywhere 依存 | Deployer (Phone Home + Push) |
|
||||
| 固定IP設定 | IPAM連携による動的割当 |
|
||||
|
||||
**統合設計:**
|
||||
|
||||
```nix
|
||||
# topology.nix(Nix-NOS)
|
||||
{
|
||||
nix-nos.clusters.plasmacloud = {
|
||||
nodes = {
|
||||
"node01" = {
|
||||
role = "control-plane";
|
||||
ip = "10.0.1.10";
|
||||
services = [ "chainfire" "flaredb" "iam" ];
|
||||
};
|
||||
"node02" = { role = "control-plane"; ip = "10.0.1.11"; };
|
||||
"node03" = { role = "worker"; ip = "10.0.1.12"; };
|
||||
};
|
||||
|
||||
# Nix-NOSが自動生成 → first-boot-automationが読む
|
||||
# cluster-config.json の内容をNix評価時に決定
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 2.2 Network Management → PrismNET + FiberLB + Nix-NOS BGP
|
||||
|
||||
**既存の実装:**
|
||||
```
|
||||
PrismNET (prismnet/)
|
||||
├── VPC/Subnet/Port管理
|
||||
├── Security Groups
|
||||
├── IPAM
|
||||
└── OVN連携
|
||||
|
||||
FiberLB (fiberlb/)
|
||||
├── L4/L7ロードバランシング
|
||||
├── ヘルスチェック
|
||||
├── VIP管理
|
||||
└── BGP統合(設計済み、GoBGPサイドカー)
|
||||
```
|
||||
|
||||
**Nix-NOSで追加すべき機能:**
|
||||
|
||||
```
|
||||
Nix-NOS Network Layer
|
||||
├── BGP設定生成(BIRD2)
|
||||
│ ├── iBGP/eBGP自動計算
|
||||
│ ├── Route Reflector対応
|
||||
│ └── ポリシー抽象化
|
||||
├── topology.nix → systemd-networkd
|
||||
├── OpenWrt/Cisco設定生成(将来)
|
||||
└── FiberLB BGP連携
|
||||
```
|
||||
|
||||
**統合設計:**
|
||||
|
||||
```nix
|
||||
# Nix-NOSのBGPモジュール → FiberLBのGoBGP設定に統合
|
||||
{
|
||||
nix-nos.network.bgp = {
|
||||
autonomousSystems = {
|
||||
"65000" = {
|
||||
members = [ "node01" "node02" "node03" ];
|
||||
ibgp.strategy = "route-reflector";
|
||||
ibgp.reflectors = [ "node01" ];
|
||||
};
|
||||
};
|
||||
|
||||
# FiberLBのVIPをBGPで広報
|
||||
vipAdvertisements = {
|
||||
"fiberlb" = {
|
||||
vips = [ "10.0.100.1" "10.0.100.2" ];
|
||||
nextHop = "self";
|
||||
communities = [ "65000:100" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# FiberLBモジュールとの連携
|
||||
services.fiberlb.bgp = {
|
||||
enable = true;
|
||||
# Nix-NOSが生成するGoBGP設定を参照
|
||||
configFile = config.nix-nos.network.bgp.gobgpConfig;
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 2.3 K8sパチモン → k8shost + Pure NixOS Alternative
|
||||
|
||||
**既存の実装:**
|
||||
```
|
||||
k8shost (k8shost/)
|
||||
├── Pod管理(gRPC API)
|
||||
├── Service管理(ClusterIP/NodePort)
|
||||
├── Node管理
|
||||
├── CNI連携
|
||||
├── CSI連携
|
||||
└── FiberLB/FlashDNS連携
|
||||
```
|
||||
|
||||
**Nix-NOSの役割:**
|
||||
|
||||
k8shostはすでにKubernetesのパチモンとして機能している。Nix-NOSは:
|
||||
|
||||
1. **k8shostを使う場合**: k8shostクラスタ自体のデプロイをNix-NOSで管理
|
||||
2. **Pure NixOS(K8sなし)**: より軽量な選択肢として、Systemd + Nix-NOSでサービス管理
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Orchestration Options │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ Option A: k8shost (K8s-like) │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ Nix-NOS manages: cluster topology, network, certs │ │
|
||||
│ │ k8shost manages: pods, services, scaling │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
│ Option B: Pure NixOS (K8s-free) │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ Nix-NOS manages: everything │ │
|
||||
│ │ systemd + containers, static service discovery │ │
|
||||
│ │ Use case: クラウド基盤自体の管理 │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**重要な洞察:**
|
||||
|
||||
> 「クラウドの基盤そのものを作るのにKubernetesは使いたくない」
|
||||
|
||||
これは正しいアプローチ。PlasmaCloudのコアサービス(ChainFire, FlareDB, IAM等)は:
|
||||
- K8sの上で動くのではなく、K8sを提供する側
|
||||
- Pure NixOS + Systemdで管理されるべき
|
||||
- Nix-NOSはこのレイヤーを担当
|
||||
|
||||
---
|
||||
|
||||
## 3. 具体的な統合計画
|
||||
|
||||
### Phase 1: Baremetal Provisioning統合
|
||||
|
||||
**目標:** first-boot-automationをNix-NOSのtopology.nixと連携
|
||||
|
||||
```nix
|
||||
# nix/modules/first-boot-automation.nix への追加
|
||||
{ config, lib, ... }:
|
||||
let
|
||||
# Nix-NOSのトポロジーから設定を生成
|
||||
clusterConfig =
|
||||
if config.nix-nos.cluster != null then
|
||||
config.nix-nos.cluster.generateClusterConfig {
|
||||
hostname = config.networking.hostName;
|
||||
}
|
||||
else
|
||||
# 従来のcluster-config.json読み込み
|
||||
builtins.fromJSON (builtins.readFile /etc/nixos/secrets/cluster-config.json);
|
||||
in {
|
||||
# 既存のfirst-boot-automationロジックはそのまま
|
||||
# ただし設定ソースをNix-NOSに切り替え可能に
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 2: BGP/Network統合
|
||||
|
||||
**目標:** FiberLBのBGP連携(T055.S3)をNix-NOSで宣言的に管理
|
||||
|
||||
```nix
|
||||
# nix/modules/fiberlb-bgp-nixnos.nix
|
||||
{ config, lib, pkgs, ... }:
|
||||
let
|
||||
fiberlbCfg = config.services.fiberlb;
|
||||
nixnosBgp = config.nix-nos.network.bgp;
|
||||
in {
|
||||
config = lib.mkIf (fiberlbCfg.enable && nixnosBgp.enable) {
|
||||
# GoBGP設定をNix-NOSから生成
|
||||
services.gobgpd = {
|
||||
enable = true;
|
||||
configFile = pkgs.writeText "gobgp.yaml" (
|
||||
nixnosBgp.generateGobgpConfig {
|
||||
localAs = nixnosBgp.getLocalAs config.networking.hostName;
|
||||
routerId = nixnosBgp.getRouterId config.networking.hostName;
|
||||
neighbors = nixnosBgp.getPeers config.networking.hostName;
|
||||
}
|
||||
);
|
||||
};
|
||||
|
||||
# FiberLBにGoBGPアドレスを注入
|
||||
services.fiberlb.bgp = {
|
||||
gobgpAddress = "127.0.0.1:50051";
|
||||
};
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### Phase 3: Deployer実装
|
||||
|
||||
**目標:** Phone Home + Push型デプロイメントコントローラー
|
||||
|
||||
```
|
||||
plasmacloud/
|
||||
├── deployer/ # 新規追加
|
||||
│ ├── src/
|
||||
│ │ ├── api.rs # Phone Home API
|
||||
│ │ ├── orchestrator.rs # デプロイワークフロー
|
||||
│ │ ├── state.rs # ノード状態管理(ChainFire連携)
|
||||
│ │ └── iso_generator.rs # ISO自動生成
|
||||
│ └── Cargo.toml
|
||||
└── nix/
|
||||
└── modules/
|
||||
└── deployer.nix # NixOSモジュール
|
||||
```
|
||||
|
||||
**ChainFireとの連携:**
|
||||
|
||||
DeployerはChainFireを状態ストアとして使用:
|
||||
|
||||
```rust
|
||||
// deployer/src/state.rs
|
||||
struct NodeState {
|
||||
hostname: String,
|
||||
status: NodeStatus, // Pending, Provisioning, Active, Failed
|
||||
bootstrap_key_hash: Option<String>,
|
||||
ssh_pubkey: Option<String>,
|
||||
last_seen: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl DeployerState {
|
||||
async fn register_node(&self, node: &NodeState) -> Result<()> {
|
||||
// ChainFireに保存
|
||||
self.chainfire_client
|
||||
.put(format!("deployer/nodes/{}", node.hostname), node.to_json())
|
||||
.await
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. アーキテクチャ全体図
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ Nix-NOS Layer │
|
||||
│ ┌─────────────────────────────────────────────────────────────┐ │
|
||||
│ │ topology.nix │ │
|
||||
│ │ - ノード定義 │ │
|
||||
│ │ - ネットワークトポロジー │ │
|
||||
│ │ - サービス配置 │ │
|
||||
│ └─────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ generates │ │
|
||||
│ ▼ │
|
||||
│ ┌──────────────┬──────────────┬──────────────┬──────────────┐ │
|
||||
│ │ NixOS Config │ BIRD Config │ GoBGP Config │ cluster- │ │
|
||||
│ │ (systemd) │ (BGP) │ (FiberLB) │ config.json │ │
|
||||
│ └──────────────┴──────────────┴──────────────┴──────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────────┐
|
||||
│ PlasmaCloud Services │
|
||||
│ ┌───────────────────────────────────────────────────────────────┐ │
|
||||
│ │ Control Plane │ │
|
||||
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
|
||||
│ │ │ChainFire │ │ FlareDB │ │ IAM │ │ Deployer │ │ │
|
||||
│ │ │(Raft KV) │ │ (SQL) │ │(AuthN/Z) │ │ (新規) │ │ │
|
||||
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
|
||||
│ └───────────────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
│ ┌───────────────────────────────────────────────────────────────┐ │
|
||||
│ │ Network Plane │ │
|
||||
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
|
||||
│ │ │ PrismNET │ │ FiberLB │ │ FlashDNS │ │ BIRD2 │ │ │
|
||||
│ │ │ (OVN) │ │(LB+BGP) │ │ (DNS) │ │(Nix-NOS) │ │ │
|
||||
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
|
||||
│ └───────────────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
│ ┌───────────────────────────────────────────────────────────────┐ │
|
||||
│ │ Compute Plane │ │
|
||||
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
|
||||
│ │ │PlasmaVMC │ │ k8shost │ │Lightning │ │ │
|
||||
│ │ │(VM/FC) │ │(K8s-like)│ │ STOR │ │ │
|
||||
│ │ └──────────┘ └──────────┘ └──────────┘ │ │
|
||||
│ └───────────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. 優先度と実装順序
|
||||
|
||||
| 優先度 | 機能 | 依存関係 | 工数 |
|
||||
|--------|------|----------|------|
|
||||
| **P0** | topology.nix → cluster-config.json生成 | なし | 1週間 |
|
||||
| **P0** | BGPモジュール(BIRD2設定生成) | なし | 2週間 |
|
||||
| **P1** | FiberLB BGP連携(GoBGP) | T055.S3完了 | 2週間 |
|
||||
| **P1** | Deployer基本実装 | ChainFire | 3週間 |
|
||||
| **P2** | OpenWrt設定生成 | BGPモジュール | 2週間 |
|
||||
| **P2** | ISO自動生成パイプライン | Deployer完了後 | 1週間 |
|
||||
| **P2** | 各サービスの設定をNixで管理可能なように | なし | 適当 |
|
||||
|
||||
---
|
||||
|
||||
## 6. 結論
|
||||
|
||||
PlasmaCloud/PhotonCloudプロジェクトは、Nix-NOSの構想を実装するための**理想的な基盤**:
|
||||
|
||||
1. **すでにNixOSモジュール化されている** → Nix-NOSモジュールとの統合が容易
|
||||
2. **first-boot-automationが存在** → Deployerの基礎として活用可能
|
||||
3. **FiberLBにBGP設計がある** → Nix-NOSのBGPモジュールと自然に統合
|
||||
4. **ChainFireが状態ストア** → Deployer状態管理に利用可能
|
||||
5. **k8shostが存在するがK8sではない** → 「K8sパチモン」の哲学と一致
|
||||
|
||||
**次のアクション:**
|
||||
1. Nix-NOSモジュールをPlasmaCloudリポジトリに追加
|
||||
2. topology.nix → cluster-config.json生成の実装
|
||||
3. BGPモジュール(BIRD2)の実装とFiberLB連携
|
||||
64
docs/README-dependency-graphs.md
Normal file
64
docs/README-dependency-graphs.md
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
# Component Dependency Graphs
|
||||
|
||||
このディレクトリには、PhotonCloudプロジェクトのコンポーネント依存関係を可視化したGraphvizファイルが含まれています。
|
||||
|
||||
## ファイル
|
||||
|
||||
- `component-dependencies.dot` - 高レベルな依存関係図(レイヤー別)
|
||||
- `component-dependencies-detailed.dot` - 詳細な依存関係図(内部構造含む)
|
||||
|
||||
## 画像生成方法
|
||||
|
||||
Graphvizがインストールされている場合、以下のコマンドでPNG画像を生成できます:
|
||||
|
||||
```bash
|
||||
# 高レベルな依存関係図
|
||||
dot -Tpng component-dependencies.dot -o component-dependencies.png
|
||||
|
||||
# 詳細な依存関係図
|
||||
dot -Tpng component-dependencies-detailed.dot -o component-dependencies-detailed.png
|
||||
|
||||
# SVG形式(拡大縮小可能)
|
||||
dot -Tsvg component-dependencies.dot -o component-dependencies.svg
|
||||
dot -Tsvg component-dependencies-detailed.dot -o component-dependencies-detailed.svg
|
||||
```
|
||||
|
||||
## Graphvizのインストール
|
||||
|
||||
### NixOS
|
||||
```bash
|
||||
nix-shell -p graphviz
|
||||
```
|
||||
|
||||
### Ubuntu/Debian
|
||||
```bash
|
||||
sudo apt-get install graphviz
|
||||
```
|
||||
|
||||
### macOS
|
||||
```bash
|
||||
brew install graphviz
|
||||
```
|
||||
|
||||
## 図の説明
|
||||
|
||||
### 高レベルな依存関係図 (`component-dependencies.dot`)
|
||||
|
||||
- **Infrastructure Layer** (青): ChainFire (分散KVストア), FlareDB (マルチモデルデータベース、FoundationDB風) - 基盤ストレージサービス
|
||||
- **Platform Layer** (オレンジ): IAM, Deployer - プラットフォームサービス
|
||||
- **Application Layer** (緑): 各種アプリケーションサービス
|
||||
- **Deployment Layer** (紫): NixOSモジュール、netboot、ISO、first-boot自動化
|
||||
|
||||
### 詳細な依存関係図 (`component-dependencies-detailed.dot`)
|
||||
|
||||
各サービスの内部構造(クレート/モジュール)と、サービス間の依存関係を詳細に表示します。
|
||||
|
||||
**注意**: FlareDBは**FoundationDBのようなマルチモデルデータベース**として設計されています。分散KVストア(RocksDB + Raft)が基盤で、その上に複数のフロントエンドレイヤー(KV API、SQLレイヤーなど)を提供します。時系列データの保存には**NightLight**(Prometheus互換メトリクスストレージ)を使用します。
|
||||
|
||||
## 凡例
|
||||
|
||||
- **青い実線**: ランタイム依存関係(直接使用)
|
||||
- **青い点線**: オプショナルな依存関係
|
||||
- **オレンジの線**: サービス間の統合
|
||||
- **紫の線**: デプロイメント/設定関連
|
||||
- **赤い点線**: systemdの起動順序依存
|
||||
111
docs/architecture/api-gateway.md
Normal file
111
docs/architecture/api-gateway.md
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
# API Gateway
|
||||
|
||||
## Role in the topology
|
||||
- FiberLB sits in front of the API Gateway and handles inbound L4/L7 load balancing.
|
||||
- API Gateway performs HTTP routing, auth, and credit checks before proxying to upstreams.
|
||||
- FlashDNS can publish gateway endpoints, but DNS is not coupled to the gateway itself.
|
||||
|
||||
## gRPC integrations
|
||||
- `GatewayAuthService.Authorize` for authentication and identity headers.
|
||||
- `GatewayCreditService.Reserve/Commit/Rollback` for credit enforcement.
|
||||
- IAM and CreditService ship adapters for these services, but any vendor can implement them.
|
||||
|
||||
## IAM authorization mapping
|
||||
- Action: `gateway:{route}:{verb}` where `verb` is `read|create|update|delete|list|execute` derived from HTTP method.
|
||||
- Resource: kind `gateway_route`, id from `route_name` (fallback to sanitized path).
|
||||
- org/project: resolved from token claims or scope; if missing, falls back to `x-org-id`/`x-project-id` headers, then `system`.
|
||||
- AuthzContext: `request.method`, `request.path`, and metadata keys `route`, `request_id`, `raw_query`.
|
||||
|
||||
## Builtin roles
|
||||
- `GatewayAdmin`: action `gateway:*:*`, resource `org/${org}/project/${project}/gateway_route/*`
|
||||
- `GatewayReadOnly`: actions `gateway:*:read` and `gateway:*:list` on the same resource pattern
|
||||
|
||||
## Configuration (TOML)
|
||||
```toml
|
||||
http_addr = "0.0.0.0:8080"
|
||||
log_level = "info"
|
||||
max_body_bytes = 16777216
|
||||
|
||||
[[auth_providers]]
|
||||
name = "iam"
|
||||
type = "grpc"
|
||||
endpoint = "http://127.0.0.1:3000"
|
||||
timeout_ms = 500
|
||||
|
||||
[[credit_providers]]
|
||||
name = "credit"
|
||||
type = "grpc"
|
||||
endpoint = "http://127.0.0.1:3010"
|
||||
timeout_ms = 500
|
||||
|
||||
[[routes]]
|
||||
name = "public"
|
||||
path_prefix = "/v1"
|
||||
upstream = "http://api-backend:8080"
|
||||
strip_prefix = true
|
||||
|
||||
[routes.auth]
|
||||
provider = "iam"
|
||||
mode = "required" # disabled | optional | required
|
||||
fail_open = false
|
||||
|
||||
[routes.credit]
|
||||
provider = "credit"
|
||||
mode = "optional" # disabled | optional | required
|
||||
units = 1
|
||||
fail_open = false
|
||||
commit_on = "success" # success | always | never
|
||||
attributes = { resource_type = "api", ttl_seconds = "300" }
|
||||
```
|
||||
|
||||
## NixOS module example
|
||||
```nix
|
||||
services.apigateway = {
|
||||
enable = true;
|
||||
port = 8080;
|
||||
maxBodyBytes = 16 * 1024 * 1024;
|
||||
|
||||
authProviders = [{
|
||||
name = "iam";
|
||||
providerType = "grpc";
|
||||
endpoint = "http://127.0.0.1:${toString config.services.iam.port}";
|
||||
timeoutMs = 500;
|
||||
}];
|
||||
|
||||
creditProviders = [{
|
||||
name = "credit";
|
||||
providerType = "grpc";
|
||||
endpoint = "http://127.0.0.1:${toString config.services.creditservice.grpcPort}";
|
||||
timeoutMs = 500;
|
||||
}];
|
||||
|
||||
routes = [{
|
||||
name = "public";
|
||||
pathPrefix = "/v1";
|
||||
upstream = "http://api-backend:8080";
|
||||
stripPrefix = true;
|
||||
auth = {
|
||||
provider = "iam";
|
||||
mode = "required";
|
||||
failOpen = false;
|
||||
};
|
||||
credit = {
|
||||
provider = "credit";
|
||||
mode = "optional";
|
||||
units = 1;
|
||||
failOpen = false;
|
||||
commitOn = "success";
|
||||
attributes = {
|
||||
resource_type = "api";
|
||||
ttl_seconds = "300";
|
||||
};
|
||||
};
|
||||
}];
|
||||
};
|
||||
```
|
||||
|
||||
## Drop-in replacement guidance
|
||||
- Implement `GatewayAuthService` or `GatewayCreditService` in any language and point the
|
||||
gateway at the gRPC endpoint via `auth_providers`/`credit_providers`.
|
||||
- Use the `headers` map in auth responses to inject custom headers (e.g., session IDs).
|
||||
- Credit adapters can interpret `attributes` to map `units` to internal billing concepts.
|
||||
113
docs/architecture/baremetal-mesh-migration.md
Normal file
113
docs/architecture/baremetal-mesh-migration.md
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
## ベアメタル向けサービスメッシュ移行計画
|
||||
|
||||
本ドキュメントでは、既存の PhotonCloud サービス群を、
|
||||
Deployer/NodeAgent+mTLS Agent ベースのサービスメッシュ風アーキテクチャに
|
||||
段階的に移行するためのマイルストーンを定義する。
|
||||
|
||||
### 1. 現状整理フェーズ(完了済み前提)
|
||||
|
||||
- `baremetal/first-boot`:
|
||||
- 役割: 初回ブート時のクラスタ join・基本サービス起動。
|
||||
- 常駐ではなく oneshot 系 systemd unit 群。
|
||||
- `deployer/`:
|
||||
- 役割: Phone Home によるノード登録と最低限の Node/Config 管理。
|
||||
- まだ常駐型 NodeAgent や ServiceInstance Reconcile は未実装。
|
||||
- mTLS:
|
||||
- 各サービスごとに ad-hoc な TLS/mTLS 設定が存在。
|
||||
- dev ではしばしば平文/`-k` での接続。
|
||||
|
||||
### 2. フェーズ 1: Chainfire モデルと Deployer 拡張
|
||||
|
||||
**目的**: Chainfire をクラスタ状態のソース・オブ・トゥルースにする土台を作る。
|
||||
|
||||
- **M1-1**: `specifications/deployer/README.md` に定義した
|
||||
`Cluster / Node / Service / ServiceInstance / MTLSPolicy` モデルを Chainfire 上に作成。
|
||||
- PoC 用に `photoncloud-ctl` 的な小さな CLI で CRUD を実装。
|
||||
- **M1-2**: `deployer-server` が `NodeInfo` を Chainfire にも書き出すように拡張。
|
||||
- 既存のローカルストレージ(ファイル or メモリ)は残しつつ、
|
||||
Chainfire を **optional backend** として追加。
|
||||
- **M1-3**: 管理 API に「サービス配置」を表すエンドポイントを追加。
|
||||
- 例: `/api/v1/admin/services/{service}/instances` で、
|
||||
Node とインスタンス数を指定できるようにし、内部で ServiceInstance を生成。
|
||||
|
||||
### 3. フェーズ 2: NodeAgent(常駐型 Reconciler)の導入
|
||||
|
||||
**目的**: 各ノードで Desired State → Observed State の Reconcile を開始する。
|
||||
|
||||
- **M2-1**: `plasmacloud-reconciler` を NodeAgent として再定義/拡張。
|
||||
- `--node-id` と `--chainfire-endpoint` を引数に取り、無限ループで動作。
|
||||
- **M2-2**: NodeAgent が自ノードの `Node` と `ServiceInstance` を watch し、
|
||||
ログ出力のみ行う「read-only モード」を実装。
|
||||
- まだ systemd 操作やプロセス起動はしない。
|
||||
- **M2-3**: systemd 統合(ベアメタルノード側)。
|
||||
- NixOS モジュールで `services.node-agent.enable = true;` を追加。
|
||||
- 既存 first-boot の後に NodeAgent を常駐させる。
|
||||
|
||||
### 4. フェーズ 3: サービスプロセス管理の Reconcile
|
||||
|
||||
**目的**: NodeAgent が実際にサービスプロセスを起動/停止できるようにする。
|
||||
|
||||
- **M3-1**: 各サービスの NixOS モジュールを見直し、
|
||||
`enable = false` をデフォルトにした上で、
|
||||
NodeAgent から `systemctl start/stop` で制御しやすい形に整理。
|
||||
- **M3-2**: NodeAgent 内に「ServiceInstance → systemd unit 名」のマッピングを追加。
|
||||
- 例: `service="chainfire" → unit="chainfire.service"`
|
||||
- 単純な 1:1 マッピングからスタート。
|
||||
- **M3-3**: Reconcile ループにプロセス制御を追加。
|
||||
- Desired にあるのに起動していなければ `systemctl start`。
|
||||
- Desired にないのに起動していれば `systemctl stop`。
|
||||
- **M3-4**: 起動結果/ヘルスを Chainfire にフィードバック。
|
||||
- `instances/{service}/{instance_id}.state` を `ready` / `unhealthy` に更新。
|
||||
|
||||
### 5. フェーズ 4: mTLS Agent の導入(プレーンプロキシ)
|
||||
|
||||
**目的**: サービスメッシュの「形」を先に作り、まだ TLS を強制しない。
|
||||
|
||||
- **M4-1**: 新クレート `mtls-agent`(名称要検討)を作成。
|
||||
- 最初は平文 TCP/HTTP プロキシとして実装。
|
||||
- ローカル app_port ←→ mesh_port の中継のみを行う。
|
||||
- **M4-2**: NodeAgent が ServiceInstance 起動時に、
|
||||
mTLS Agent を隣に起動するフローを追加。
|
||||
- config ファイル生成 → `systemctl start mtls-agent@{service}` など。
|
||||
- **M4-3**: Chainfire 上の ServiceInstance に `mesh_port` を登録。
|
||||
- 他サービスからの接続先として mesh_port を使う用意をする。
|
||||
- **M4-4**: 一部サービス間通信(例: `apigateway → creditservice`)を
|
||||
mTLS Agent 経由に切り替える PoC。
|
||||
- アプリ側は `client-common` 抽象を通じて `http://127.0.0.1:<mesh_control_port>` を叩く。
|
||||
|
||||
### 6. フェーズ 5: mTLS 対応とポリシー制御
|
||||
|
||||
**目的**: mTLS/TLS/平文を Chainfire のポリシーで切り替えられるようにする。
|
||||
|
||||
- **M5-1**: mTLS Agent に TLS/mTLS 機能を実装。
|
||||
- dev では平文、stg/prod では mTLS をデフォルトに。
|
||||
- 証明書/鍵は既存の T031 TLS 自動化の成果物を利用。
|
||||
- **M5-2**: Chainfire の `MTLSPolicy` を反映するロジックを Agent に実装。
|
||||
- `(source_service, target_service)` と Cluster の `environment` からモード決定。
|
||||
- **M5-3**: Deployer から `MTLSPolicy` を編集できる管理 API を追加。
|
||||
- 例: `/api/v1/admin/mtls/policies`。
|
||||
- **M5-4**: ステージング環境で「全経路 mTLS on」を試験。
|
||||
- 問題があればポリシーを `permissive` や `plain` に戻せることを確認。
|
||||
|
||||
### 7. フェーズ 6: 既存 ad-hoc mTLS 実装の段階的削除
|
||||
|
||||
**目的**: サービスコードから mTLS 実装を徐々に削除し、Agent に集約する。
|
||||
|
||||
- **M6-1**: 既存の各サービスから「直接 TLS ソケットを開いているコード」を列挙。
|
||||
- `grep` ベースで `rustls`, `native-tls`, `tls` 関連を洗い出し。
|
||||
- **M6-2**: 重要なサービスから順に、通信経路を `client-common` 抽象経由に置き換え。
|
||||
- まずは dev 環境でのみ mTLS Agent 経由にする feature flag を導入。
|
||||
- **M6-3**: 本番で mTLS Agent 経由通信が安定したら、
|
||||
対象サービスから ad-hoc な TLS 設定を削除。
|
||||
- **M6-4**: 最終的に、サービス側は「平文 HTTP/gRPC over localhost」という前提のみを持ち、
|
||||
セキュリティ/暗号化はすべて mTLS Agent に移譲。
|
||||
|
||||
### 8. 段階ごとのロールバック戦略
|
||||
|
||||
- 各フェーズは **Chainfire のキー空間と Deployer 設定で制御** できるようにする。
|
||||
- 例: NodeAgent を停止すれば、従来通り first-boot ベースの静的構成に戻せる。
|
||||
- 例: `MTLSPolicy` を削除すれば、Agent は平文モードに戻る(または完全停止)。
|
||||
- NodeAgent/mTLS Agent を導入するときは、必ず
|
||||
「全てのノードで Agent を止めると従来構成に戻る」状態を維持したまま進める。
|
||||
|
||||
|
||||
124
docs/cert-authority-usage.md
Normal file
124
docs/cert-authority-usage.md
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
# Cert Authority 使用ガイド
|
||||
|
||||
## 概要
|
||||
|
||||
`cert-authority`は、PhotonCloudクラスタ内のmTLS通信に使用する証明書を発行・管理するツールです。
|
||||
|
||||
## 機能
|
||||
|
||||
1. **CA証明書の生成** (`init-ca`)
|
||||
2. **証明書の発行** (`issue`)
|
||||
3. **証明書ローテーションのチェック** (`check-rotation`)
|
||||
|
||||
## 使用方法
|
||||
|
||||
### 1. CA証明書の生成
|
||||
|
||||
初回セットアップ時に、ルートCA証明書とキーを生成します。
|
||||
|
||||
```bash
|
||||
cert-authority \
|
||||
--chainfire-endpoint http://localhost:2379 \
|
||||
--cluster-id test-cluster-01 \
|
||||
--ca-cert-path /etc/photoncloud/ca.crt \
|
||||
--ca-key-path /etc/photoncloud/ca.key \
|
||||
init-ca
|
||||
```
|
||||
|
||||
これにより、以下のファイルが生成されます:
|
||||
- `/etc/photoncloud/ca.crt`: CA証明書(PEM形式)
|
||||
- `/etc/photoncloud/ca.key`: CA秘密鍵(PEM形式)
|
||||
|
||||
### 2. 証明書の発行
|
||||
|
||||
ノードまたはサービス用の証明書を発行します。
|
||||
|
||||
```bash
|
||||
# ノード用証明書
|
||||
cert-authority \
|
||||
--chainfire-endpoint http://localhost:2379 \
|
||||
--cluster-id test-cluster-01 \
|
||||
--ca-cert-path /etc/photoncloud/ca.crt \
|
||||
--ca-key-path /etc/photoncloud/ca.key \
|
||||
issue \
|
||||
--csr-path /tmp/node-01.csr \
|
||||
--cert-path /etc/photoncloud/node-01.crt \
|
||||
--node-id node-01
|
||||
|
||||
# サービス用証明書
|
||||
cert-authority \
|
||||
--chainfire-endpoint http://localhost:2379 \
|
||||
--cluster-id test-cluster-01 \
|
||||
--ca-cert-path /etc/photoncloud/ca.crt \
|
||||
--ca-key-path /etc/photoncloud/ca.key \
|
||||
issue \
|
||||
--csr-path /tmp/api-server.csr \
|
||||
--cert-path /etc/photoncloud/api-server.crt \
|
||||
--service-name api-server
|
||||
```
|
||||
|
||||
**注意**: 現在の実装では、CSRファイルは読み込まれず、新しいキーペアが自動生成されます。CSRパース機能は今後の拡張予定です。
|
||||
|
||||
発行された証明書は以下の場所に保存されます:
|
||||
- `{cert_path}`: 証明書(PEM形式)
|
||||
- `{cert_path}.key`: 秘密鍵(PEM形式)
|
||||
|
||||
また、証明書バインディング情報がChainfireに記録されます:
|
||||
- キー: `photoncloud/clusters/{cluster_id}/mtls/certs/{node_id or service_name}/...`
|
||||
- 値: `CertificateBinding` JSON(シリアル番号、発行日時、有効期限など)
|
||||
|
||||
### 3. 証明書ローテーションのチェック
|
||||
|
||||
証明書の有効期限をチェックし、ローテーションが必要かどうかを判定します。
|
||||
|
||||
```bash
|
||||
cert-authority \
|
||||
--chainfire-endpoint http://localhost:2379 \
|
||||
--cluster-id test-cluster-01 \
|
||||
--ca-cert-path /etc/photoncloud/ca.crt \
|
||||
--ca-key-path /etc/photoncloud/ca.key \
|
||||
check-rotation \
|
||||
--cert-path /etc/photoncloud/node-01.crt
|
||||
```
|
||||
|
||||
有効期限が30日以内の場合、警告が表示されます。
|
||||
|
||||
## 証明書の有効期限
|
||||
|
||||
- **デフォルトTTL**: 90日
|
||||
- **ローテーション推奨期間**: 30日
|
||||
|
||||
これらの値は`deployer/crates/cert-authority/src/main.rs`の定数で定義されています:
|
||||
- `CERT_TTL_DAYS`: 90
|
||||
- `ROTATION_THRESHOLD_DAYS`: 30
|
||||
|
||||
## Chainfire統合
|
||||
|
||||
証明書発行時、以下の情報がChainfireに記録されます:
|
||||
|
||||
```json
|
||||
{
|
||||
"node_id": "node-01",
|
||||
"service_name": null,
|
||||
"cert_serial": "abc123...",
|
||||
"issued_at": 1234567890,
|
||||
"expires_at": 1234567890
|
||||
}
|
||||
```
|
||||
|
||||
この情報は、証明書の追跡やローテーション管理に使用されます。
|
||||
|
||||
## セキュリティ考慮事項
|
||||
|
||||
1. **CA秘密鍵の保護**: CA秘密鍵は厳重に管理し、アクセス権限を最小限に抑えてください。
|
||||
2. **証明書の配布**: 発行された証明書と秘密鍵は、適切な権限で保護された場所に保存してください。
|
||||
3. **ローテーション**: 定期的に証明書をローテーションし、古い証明書を無効化してください。
|
||||
|
||||
## 今後の拡張予定
|
||||
|
||||
- [ ] CSRパース機能の実装
|
||||
- [ ] 証明書の自動ローテーション
|
||||
- [ ] 証明書失効リスト(CRL)のサポート
|
||||
- [ ] SPIFFEライクなアイデンティティ検証
|
||||
|
||||
|
||||
174
docs/component-dependencies-detailed.dot
Normal file
174
docs/component-dependencies-detailed.dot
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
digraph DetailedComponentDependencies {
|
||||
rankdir=LR;
|
||||
node [shape=box, style=rounded];
|
||||
|
||||
// Infrastructure Services
|
||||
subgraph cluster_chainfire {
|
||||
label="ChainFire (Distributed KV Store)";
|
||||
style=dashed;
|
||||
|
||||
CF_Server [label="chainfire-server", fillcolor="#e1f5ff", style="filled"];
|
||||
CF_Client [label="chainfire-client", fillcolor="#e1f5ff", style="filled"];
|
||||
CF_Raft [label="chainfire-raft", fillcolor="#e1f5ff", style="filled"];
|
||||
CF_Storage [label="chainfire-storage\n(RocksDB)", fillcolor="#e1f5ff", style="filled"];
|
||||
|
||||
CF_Server -> CF_Raft;
|
||||
CF_Server -> CF_Storage;
|
||||
CF_Client -> CF_Server [style=dashed, label="gRPC"];
|
||||
}
|
||||
|
||||
subgraph cluster_flaredb {
|
||||
label="FlareDB (Multi-Model Database\nFoundationDB-like)";
|
||||
style=dashed;
|
||||
|
||||
FD_Server [label="flaredb-server", fillcolor="#e1f5ff", style="filled"];
|
||||
FD_Client [label="flaredb-client", fillcolor="#e1f5ff", style="filled"];
|
||||
FD_Raft [label="flaredb-raft\n(openraft)", fillcolor="#e1f5ff", style="filled"];
|
||||
FD_Storage [label="flaredb-storage\n(RocksDB)\nKV Store Base", fillcolor="#e1f5ff", style="filled"];
|
||||
FD_KV [label="KV APIs\n(Raw, CAS)", fillcolor="#e1f5ff", style="filled"];
|
||||
FD_SQL [label="SQL Layer\n(sql-service)", fillcolor="#e1f5ff", style="filled"];
|
||||
|
||||
FD_Server -> FD_Raft;
|
||||
FD_Server -> FD_Storage;
|
||||
FD_Server -> FD_KV;
|
||||
FD_Server -> FD_SQL;
|
||||
FD_KV -> FD_Storage;
|
||||
FD_SQL -> FD_Storage;
|
||||
FD_Client -> FD_Server [style=dashed, label="gRPC"];
|
||||
FD_Server -> CF_Client [style=dashed, label="uses"];
|
||||
}
|
||||
|
||||
// Platform Services
|
||||
subgraph cluster_iam {
|
||||
label="IAM (Identity & Access)";
|
||||
style=dashed;
|
||||
|
||||
IAM_Server [label="iam-server", fillcolor="#fff4e1", style="filled"];
|
||||
IAM_Client [label="iam-client", fillcolor="#fff4e1", style="filled"];
|
||||
IAM_Store [label="iam-store", fillcolor="#fff4e1", style="filled"];
|
||||
|
||||
IAM_Server -> IAM_Store;
|
||||
IAM_Server -> CF_Client [style=dashed];
|
||||
IAM_Server -> FD_Client [style=dashed];
|
||||
IAM_Client -> IAM_Server [style=dashed, label="gRPC"];
|
||||
}
|
||||
|
||||
subgraph cluster_deployer {
|
||||
label="Deployer (Provisioning)";
|
||||
style=dashed;
|
||||
|
||||
DEP_Server [label="deployer-server", fillcolor="#fff4e1", style="filled"];
|
||||
DEP_Types [label="deployer-types", fillcolor="#fff4e1", style="filled"];
|
||||
|
||||
DEP_Server -> DEP_Types;
|
||||
DEP_Server -> CF_Client [style=dashed, label="storage"];
|
||||
}
|
||||
|
||||
// Application Services
|
||||
subgraph cluster_plasmavmc {
|
||||
label="PlasmaVMC (VM Control)";
|
||||
style=dashed;
|
||||
|
||||
PVMC_Server [label="plasmavmc-server", fillcolor="#e8f5e9", style="filled"];
|
||||
PVMC_Hypervisor [label="plasmavmc-hypervisor", fillcolor="#e8f5e9", style="filled"];
|
||||
PVMC_KVM [label="plasmavmc-kvm", fillcolor="#e8f5e9", style="filled"];
|
||||
PVMC_FC [label="plasmavmc-firecracker", fillcolor="#e8f5e9", style="filled"];
|
||||
|
||||
PVMC_Server -> PVMC_Hypervisor;
|
||||
PVMC_Hypervisor -> PVMC_KVM;
|
||||
PVMC_Hypervisor -> PVMC_FC;
|
||||
PVMC_Server -> CF_Client [style=dashed];
|
||||
PVMC_Server -> FD_Client [style=dashed];
|
||||
PVMC_Server -> IAM_Client [style=dashed];
|
||||
}
|
||||
|
||||
subgraph cluster_prismnet {
|
||||
label="PrismNET (SDN Controller)";
|
||||
style=dashed;
|
||||
|
||||
PN_Server [label="prismnet-server", fillcolor="#e8f5e9", style="filled"];
|
||||
PN_API [label="prismnet-api", fillcolor="#e8f5e9", style="filled"];
|
||||
|
||||
PN_Server -> PN_API;
|
||||
PN_Server -> CF_Client [style=dashed];
|
||||
}
|
||||
|
||||
subgraph cluster_k8shost {
|
||||
label="K8sHost (K8s-like)";
|
||||
style=dashed;
|
||||
|
||||
K8S_Server [label="k8shost-server", fillcolor="#e8f5e9", style="filled"];
|
||||
K8S_Controllers [label="k8shost-controllers", fillcolor="#e8f5e9", style="filled"];
|
||||
K8S_CNI [label="k8shost-cni", fillcolor="#e8f5e9", style="filled"];
|
||||
K8S_CSI [label="k8shost-csi", fillcolor="#e8f5e9", style="filled"];
|
||||
|
||||
K8S_Server -> K8S_Controllers;
|
||||
K8S_Server -> K8S_CNI;
|
||||
K8S_Server -> K8S_CSI;
|
||||
K8S_Server -> FD_Client [style=dashed];
|
||||
K8S_Server -> IAM_Client [style=dashed];
|
||||
K8S_Server -> PN_API [style=dashed];
|
||||
}
|
||||
|
||||
subgraph cluster_other_apps {
|
||||
label="Other Application Services";
|
||||
style=dashed;
|
||||
|
||||
FlashDNS_Server [label="flashdns-server", fillcolor="#e8f5e9", style="filled"];
|
||||
FiberLB_Server [label="fiberlb-server", fillcolor="#e8f5e9", style="filled"];
|
||||
APIGateway_Server [label="apigateway-server", fillcolor="#e8f5e9", style="filled"];
|
||||
LightningStor_Server [label="lightningstor-server", fillcolor="#e8f5e9", style="filled"];
|
||||
NightLight_Server [label="nightlight-server", fillcolor="#e8f5e9", style="filled"];
|
||||
CreditService_Server [label="creditservice-server", fillcolor="#e8f5e9", style="filled"];
|
||||
|
||||
FlashDNS_Server -> CF_Client [style=dashed];
|
||||
FlashDNS_Server -> FD_Client [style=dashed];
|
||||
FiberLB_Server -> CF_Client [style=dashed];
|
||||
FiberLB_Server -> FD_Client [style=dashed];
|
||||
APIGateway_Server -> FiberLB_Server [style=dashed, label="fronted by"];
|
||||
APIGateway_Server -> IAM_Client [style=dashed, label="auth"];
|
||||
APIGateway_Server -> CreditService_Server [style=dashed, label="billing"];
|
||||
LightningStor_Server -> CF_Client [style=dashed];
|
||||
LightningStor_Server -> FD_Client [style=dashed];
|
||||
CreditService_Server -> CF_Client [style=dashed];
|
||||
}
|
||||
|
||||
// Deployment Components
|
||||
subgraph cluster_nixos {
|
||||
label="NixOS Deployment";
|
||||
style=dashed;
|
||||
|
||||
NixModules [label="NixOS Modules\n(nix/modules/)", fillcolor="#f3e5f5", style="filled"];
|
||||
Netboot [label="Netboot Images\n(nix/images/)", fillcolor="#f3e5f5", style="filled"];
|
||||
ISO [label="Bootstrap ISO\n(nix/iso/)", fillcolor="#f3e5f5", style="filled"];
|
||||
FirstBoot [label="First-Boot Automation\n(first-boot-automation.nix)", fillcolor="#f3e5f5", style="filled"];
|
||||
ClusterConfig [label="Cluster Config\n(plasmacloud-cluster.nix)", fillcolor="#f3e5f5", style="filled"];
|
||||
NixNOS_Topo [label="Nix-NOS Topology\n(nix-nos/topology.nix)", fillcolor="#f3e5f5", style="filled"];
|
||||
|
||||
Netboot -> NixModules;
|
||||
ISO -> NixModules;
|
||||
ISO -> DEP_Server [style=dashed, label="phone-home"];
|
||||
FirstBoot -> NixModules;
|
||||
FirstBoot -> CF_Server [style=dashed, label="cluster-join"];
|
||||
FirstBoot -> FD_Server [style=dashed, label="cluster-join"];
|
||||
ClusterConfig -> NixModules;
|
||||
NixNOS_Topo -> ClusterConfig;
|
||||
}
|
||||
|
||||
// Service dependencies (runtime)
|
||||
FD_Server -> CF_Server [label="systemd:after", color=red, style=dotted];
|
||||
IAM_Server -> FD_Server [label="systemd:after", color=red, style=dotted];
|
||||
PVMC_Server -> CF_Server [label="systemd:requires", color=red, style=dotted];
|
||||
PVMC_Server -> FD_Server [label="systemd:requires", color=red, style=dotted];
|
||||
PVMC_Server -> IAM_Server [label="systemd:requires", color=red, style=dotted];
|
||||
K8S_Server -> IAM_Server [label="systemd:requires", color=red, style=dotted];
|
||||
K8S_Server -> FD_Server [label="systemd:requires", color=red, style=dotted];
|
||||
K8S_Server -> PN_Server [label="systemd:requires", color=red, style=dotted];
|
||||
|
||||
// Application integrations
|
||||
PVMC_Server -> PN_API [style=dashed, label="networking", color=orange];
|
||||
K8S_Server -> PN_API [style=dashed, label="CNI", color=orange];
|
||||
|
||||
// Styling
|
||||
edge [color=blue];
|
||||
}
|
||||
131
docs/component-dependencies.dot
Normal file
131
docs/component-dependencies.dot
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
digraph ComponentDependencies {
|
||||
rankdir=TB;
|
||||
node [shape=box, style=rounded];
|
||||
|
||||
// Infrastructure Layer (Base Services)
|
||||
subgraph cluster_infra {
|
||||
label="Infrastructure Layer";
|
||||
style=dashed;
|
||||
|
||||
ChainFire [fillcolor="#e1f5ff", style="filled,rounded"];
|
||||
FlareDB [fillcolor="#e1f5ff", style="filled,rounded"];
|
||||
}
|
||||
|
||||
// Platform Layer
|
||||
subgraph cluster_platform {
|
||||
label="Platform Layer";
|
||||
style=dashed;
|
||||
|
||||
IAM [fillcolor="#fff4e1", style="filled,rounded"];
|
||||
Deployer [fillcolor="#fff4e1", style="filled,rounded"];
|
||||
}
|
||||
|
||||
// Application Layer
|
||||
subgraph cluster_app {
|
||||
label="Application Layer";
|
||||
style=dashed;
|
||||
|
||||
PlasmaVMC [fillcolor="#e8f5e9", style="filled,rounded"];
|
||||
PrismNET [fillcolor="#e8f5e9", style="filled,rounded"];
|
||||
FlashDNS [fillcolor="#e8f5e9", style="filled,rounded"];
|
||||
FiberLB [fillcolor="#e8f5e9", style="filled,rounded"];
|
||||
APIGateway [fillcolor="#e8f5e9", style="filled,rounded"];
|
||||
LightningStor [fillcolor="#e8f5e9", style="filled,rounded"];
|
||||
NightLight [fillcolor="#e8f5e9", style="filled,rounded"];
|
||||
CreditService [fillcolor="#e8f5e9", style="filled,rounded"];
|
||||
K8sHost [fillcolor="#e8f5e9", style="filled,rounded"];
|
||||
}
|
||||
|
||||
// Deployment Layer
|
||||
subgraph cluster_deploy {
|
||||
label="Deployment Layer";
|
||||
style=dashed;
|
||||
|
||||
NixOSModules [fillcolor="#f3e5f5", style="filled,rounded"];
|
||||
NetbootImages [fillcolor="#f3e5f5", style="filled,rounded"];
|
||||
BootstrapISO [fillcolor="#f3e5f5", style="filled,rounded"];
|
||||
FirstBootAutomation [fillcolor="#f3e5f5", style="filled,rounded"];
|
||||
NixNOS [fillcolor="#f3e5f5", style="filled,rounded"];
|
||||
}
|
||||
|
||||
// Infrastructure dependencies
|
||||
FlareDB -> ChainFire [label="requires", color=blue];
|
||||
|
||||
// Platform dependencies
|
||||
IAM -> FlareDB [label="uses", color=blue];
|
||||
IAM -> ChainFire [label="uses", color=blue, style=dashed];
|
||||
Deployer -> ChainFire [label="storage", color=blue];
|
||||
|
||||
// Application dependencies on Infrastructure
|
||||
PlasmaVMC -> ChainFire [label="uses", color=blue, style=dashed];
|
||||
PlasmaVMC -> FlareDB [label="uses", color=blue, style=dashed];
|
||||
PrismNET -> ChainFire [label="uses", color=blue, style=dashed];
|
||||
FlashDNS -> ChainFire [label="uses", color=blue, style=dashed];
|
||||
FlashDNS -> FlareDB [label="uses", color=blue, style=dashed];
|
||||
FiberLB -> ChainFire [label="uses", color=blue, style=dashed];
|
||||
FiberLB -> FlareDB [label="uses", color=blue, style=dashed];
|
||||
LightningStor -> ChainFire [label="uses", color=blue, style=dashed];
|
||||
LightningStor -> FlareDB [label="uses", color=blue, style=dashed];
|
||||
CreditService -> ChainFire [label="uses", color=blue];
|
||||
K8sHost -> FlareDB [label="uses", color=blue];
|
||||
K8sHost -> ChainFire [label="uses", color=blue, style=dashed];
|
||||
|
||||
// Application dependencies on Platform
|
||||
PlasmaVMC -> IAM [label="auth", color=orange];
|
||||
PlasmaVMC -> CreditService [label="billing", color=orange, style=dashed];
|
||||
PlasmaVMC -> PrismNET [label="networking", color=orange];
|
||||
K8sHost -> IAM [label="auth", color=orange];
|
||||
K8sHost -> CreditService [label="billing", color=orange, style=dashed];
|
||||
K8sHost -> PrismNET [label="CNI", color=orange];
|
||||
K8sHost -> FiberLB [label="ingress", color=orange, style=dashed];
|
||||
K8sHost -> FlashDNS [label="DNS", color=orange, style=dashed];
|
||||
APIGateway -> FiberLB [label="fronted by", color=orange, style=dashed];
|
||||
APIGateway -> IAM [label="auth", color=orange, style=dashed];
|
||||
APIGateway -> CreditService [label="billing", color=orange, style=dashed];
|
||||
|
||||
// Deployment dependencies
|
||||
NixOSModules -> ChainFire [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> FlareDB [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> IAM [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> PlasmaVMC [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> PrismNET [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> FlashDNS [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> FiberLB [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> APIGateway [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> LightningStor [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> NightLight [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> CreditService [label="module", color=purple, style=dotted];
|
||||
NixOSModules -> K8sHost [label="module", color=purple, style=dotted];
|
||||
|
||||
NetbootImages -> NixOSModules [label="uses", color=purple];
|
||||
BootstrapISO -> NixOSModules [label="uses", color=purple];
|
||||
BootstrapISO -> Deployer [label="phone-home", color=purple];
|
||||
FirstBootAutomation -> ChainFire [label="cluster-join", color=purple];
|
||||
FirstBootAutomation -> FlareDB [label="cluster-join", color=purple];
|
||||
FirstBootAutomation -> IAM [label="initial-setup", color=purple];
|
||||
FirstBootAutomation -> NixOSModules [label="uses", color=purple];
|
||||
NixNOS -> NixOSModules [label="generates", color=purple];
|
||||
NixNOS -> FirstBootAutomation [label="config", color=purple];
|
||||
|
||||
// Systemd dependencies (runtime)
|
||||
FlareDB -> ChainFire [label="systemd:after", color=red, style=dashed];
|
||||
IAM -> FlareDB [label="systemd:after", color=red, style=dashed];
|
||||
PlasmaVMC -> ChainFire [label="systemd:requires", color=red, style=dashed];
|
||||
PlasmaVMC -> FlareDB [label="systemd:requires", color=red, style=dashed];
|
||||
PlasmaVMC -> IAM [label="systemd:requires", color=red, style=dashed];
|
||||
CreditService -> ChainFire [label="systemd:wants", color=red, style=dashed];
|
||||
K8sHost -> IAM [label="systemd:requires", color=red, style=dashed];
|
||||
K8sHost -> FlareDB [label="systemd:requires", color=red, style=dashed];
|
||||
K8sHost -> PrismNET [label="systemd:requires", color=red, style=dashed];
|
||||
|
||||
// Legend
|
||||
subgraph cluster_legend {
|
||||
label="Legend";
|
||||
style=invis;
|
||||
|
||||
L1 [label="Runtime Dependency", color=blue, style=invis];
|
||||
L2 [label="Service Integration", color=orange, style=invis];
|
||||
L3 [label="Deployment/Config", color=purple, style=invis];
|
||||
L4 [label="Systemd Order", color=red, style=invis];
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1 @@
|
|||
{"bootstrap":true,"cluster_name":"plasmacloud","flaredb_peers":["10.0.1.10:2479"],"initial_peers":[{"addr":"10.0.1.10:2380","id":"node01"}],"leader_url":"https://10.0.1.10:2379","metadata":{},"node_id":"node01","node_role":"control-plane","raft_addr":"10.0.1.10:2380","services":["chainfire"]}
|
||||
53
docs/evidence/first-boot-automation-20251220-050900/test.nix
Normal file
53
docs/evidence/first-boot-automation-20251220-050900/test.nix
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
let
|
||||
nixpkgs = builtins.getFlake "nixpkgs";
|
||||
system = "x86_64-linux";
|
||||
pkgs = import nixpkgs { inherit system; };
|
||||
testLib = import "${nixpkgs}/nixos/lib/testing-python.nix" { inherit system; };
|
||||
firstBootModule = /home/centra/cloud/nix/modules/first-boot-automation.nix;
|
||||
topologyModule = /home/centra/cloud/nix/modules/nix-nos/topology.nix;
|
||||
in testLib.makeTest {
|
||||
name = "first-boot-automation";
|
||||
nodes.machine = { pkgs, ... }: {
|
||||
imports = [
|
||||
topologyModule
|
||||
firstBootModule
|
||||
];
|
||||
|
||||
system.stateVersion = "24.05";
|
||||
|
||||
networking.hostName = "node01";
|
||||
|
||||
nix-nos.enable = true;
|
||||
nix-nos.clusters.plasmacloud = {
|
||||
name = "plasmacloud";
|
||||
bootstrapNode = null;
|
||||
nodes.node01 = {
|
||||
role = "control-plane";
|
||||
ip = "10.0.1.10";
|
||||
services = [ "chainfire" ];
|
||||
};
|
||||
};
|
||||
|
||||
services.first-boot-automation = {
|
||||
enable = true;
|
||||
useNixNOS = true;
|
||||
nixnosClusterName = "plasmacloud";
|
||||
configFile = "/etc/nixos/secrets/cluster-config.json";
|
||||
# Disable joiners to keep the test lean (no daemons required)
|
||||
enableChainfire = false;
|
||||
enableFlareDB = false;
|
||||
enableIAM = false;
|
||||
enableHealthCheck = false;
|
||||
};
|
||||
|
||||
environment.systemPackages = [ pkgs.jq ];
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
start_all()
|
||||
machine.wait_for_unit("multi-user.target")
|
||||
machine.succeed("cat /etc/nixos/secrets/cluster-config.json | jq -r .node_id | grep node01")
|
||||
machine.succeed("test -d /var/lib/first-boot-automation")
|
||||
machine.succeed("systemctl --failed --no-legend")
|
||||
'';
|
||||
}
|
||||
21
docs/evidence/first-boot-automation-cluster-config.txt
Normal file
21
docs/evidence/first-boot-automation-cluster-config.txt
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
nix eval --impure --expr 'let nixpkgs = builtins.getFlake "nixpkgs"; lib = nixpkgs.lib; pkgs = nixpkgs.legacyPackages.x86_64-linux; systemCfg = lib.nixosSystem {
|
||||
system = "x86_64-linux";
|
||||
modules = [ ./nix/modules/nix-nos/topology.nix ./nix/modules/first-boot-automation.nix {
|
||||
networking.hostName = "node01";
|
||||
nix-nos.enable = true;
|
||||
nix-nos.clusters.plasmacloud = {
|
||||
name = "plasmacloud";
|
||||
bootstrapNode = null;
|
||||
nodes.node01 = { role = "control-plane"; ip = "10.0.1.10"; services = [ "chainfire" ]; };
|
||||
};
|
||||
services.first-boot-automation = {
|
||||
enable = true;
|
||||
useNixNOS = true;
|
||||
nixnosClusterName = "plasmacloud";
|
||||
configFile = "/etc/nixos/secrets/cluster-config.json";
|
||||
};
|
||||
} ];
|
||||
}; in systemCfg.config.environment.etc."nixos/secrets/cluster-config.json".text'
|
||||
|
||||
Output:
|
||||
{"bootstrap":true,"cluster_name":"plasmacloud","flaredb_peers":["10.0.1.10:2479"],"initial_peers":[{"addr":"10.0.1.10:2380","id":"node01"}],"leader_url":"https://10.0.1.10:2379","metadata":{},"node_id":"node01","node_role":"control-plane","raft_addr":"10.0.1.10:2380","services":["chainfire"]}
|
||||
74
docs/implementation-status.md
Normal file
74
docs/implementation-status.md
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# PhotonCloud Bare-Metal Service Mesh実装状況
|
||||
|
||||
## 実装済み
|
||||
|
||||
### deployer-ctl CLI
|
||||
- ✅ `bootstrap`: Chainfireへのクラスタ初期設定投入
|
||||
- ✅ `apply`: 宣言的なクラスタ状態の適用
|
||||
- ✅ `dump`: Chainfire上のキー一覧とデバッグ
|
||||
- ✅ `deployer`: リモートDeployer制御(プレースホルダ)
|
||||
|
||||
### node-agent
|
||||
- ✅ Chainfireからノード情報の取得
|
||||
- ✅ ハートビート更新(`last_heartbeat`)
|
||||
- ✅ ローカルServiceInstanceの同期(`/etc/photoncloud/instances.json`)
|
||||
- ✅ プロセスReconcileのスケルトン
|
||||
- ✅ ヘルスチェック(HTTP/TCP/Command)
|
||||
|
||||
### mtls-agent
|
||||
- ✅ プレーンTCPプロキシモード
|
||||
- ✅ TLS/mTLSサーバモード(`rustls`ベース)
|
||||
- ✅ モード切替(`plain`/`tls`/`mtls`/`auto`)
|
||||
- ✅ Chainfire統合(ServiceDiscovery)
|
||||
- ✅ サービス発見とキャッシュ
|
||||
- ✅ mTLSポリシー取得
|
||||
|
||||
### cert-authority
|
||||
- ⚠️ CA証明書生成(TODO: rcgen API更新が必要)
|
||||
- ⚠️ 証明書発行(TODO: rcgen API更新が必要)
|
||||
|
||||
## 未実装・今後の課題
|
||||
|
||||
### Step 5: サービス発見と新規マシンの発見
|
||||
- ✅ NodeAgentによるServiceInstance登録
|
||||
- ✅ mTLS AgentによるChainfire経由のサービス発見
|
||||
- ⚠️ 新規ノードの自動検出とブートストラップ
|
||||
|
||||
### Step 6: mTLS証明書ライフサイクルとセキュリティモデル
|
||||
- ⚠️ 証明書発行フロー(rcgen API更新待ち)
|
||||
- ⚠️ 証明書ローテーション
|
||||
- ⚠️ SPIFFEライクなアイデンティティ検証
|
||||
|
||||
### Step 7: mTLSオン/オフと環境別ポリシー
|
||||
- ✅ 環境別デフォルト設定(`ClusterStateSpec`)
|
||||
- ✅ mTLS AgentでのChainfire経由ポリシー読み込み
|
||||
- ⚠️ 動的ポリシー更新(Watch)
|
||||
|
||||
### Step 8: 既存サービスの移行計画
|
||||
- ⚠️ クライアントラッパの実装
|
||||
- ⚠️ 段階的移行ツール
|
||||
|
||||
### Step 9: Chainfireとの具体的なインテグレーション
|
||||
- ✅ 基本的なCRUD操作
|
||||
- ⚠️ 認証・権限モデル
|
||||
- ⚠️ フォールトトレランス(キャッシュ)
|
||||
|
||||
### Step 10: 実装優先度とマイルストーン
|
||||
- ✅ MVPフェーズ(NodeAgent/mTLS Agent基本機能)
|
||||
- ⚠️ mTLS対応フェーズ(証明書管理)
|
||||
- ⚠️ 運用フェーズ(監視・ログ・トレース)
|
||||
- ⚠️ QEMU環境でのE2Eテスト
|
||||
|
||||
## ビルド状況
|
||||
- `deployer-ctl`: ✅ ビルド成功
|
||||
- `node-agent`: ✅ ビルド成功
|
||||
- `mtls-agent`: 確認中
|
||||
- `cert-authority`: 確認中(rcgen API問題あり)
|
||||
|
||||
## 次のステップ
|
||||
1. NodeAgentのプロセス起動/停止Reconcile実装
|
||||
2. mTLS Agentのポリシー適用とWatch機能
|
||||
3. QEMU環境でのE2Eテスト環境構築
|
||||
4. 証明書管理(rcgen API更新後)
|
||||
|
||||
|
||||
91
docs/implementation-summary.md
Normal file
91
docs/implementation-summary.md
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
# PhotonCloud Bare-Metal Service Mesh実装完了サマリ(更新)
|
||||
|
||||
## 実装概要
|
||||
|
||||
PhotonCloud Bare-Metal Service Meshの実装が完了しました。Kubernetes不要のベアメタル環境で、サービスメッシュ風のmTLS通信を実現できるフレームワークです。
|
||||
|
||||
## 実装完了コンポーネント
|
||||
|
||||
### 1. deployer-ctl(CLI)✅
|
||||
GitOpsフレンドリーな宣言的クラスタ管理ツール
|
||||
|
||||
**機能:**
|
||||
- `bootstrap`: Chainfireへのクラスタ初期設定投入
|
||||
- `apply`: 宣言的なクラスタ状態の適用
|
||||
- `dump`: Chainfire上のキー一覧とデバッグ
|
||||
- `deployer`: リモートDeployer制御(プレースホルダ)
|
||||
|
||||
### 2. node-agent(ノードエージェント)✅
|
||||
各ベアメタルノード上で常駐するエージェント
|
||||
|
||||
**機能:**
|
||||
- Chainfireからノード情報の取得
|
||||
- ハートビート更新(`last_heartbeat`)
|
||||
- ローカルServiceInstanceの同期(`/etc/photoncloud/instances.json`)
|
||||
- プロセスReconcile(起動/停止/再起動)
|
||||
- ヘルスチェック(HTTP/TCP/Command)
|
||||
- ProcessManager実装(PIDファイルベース管理)
|
||||
|
||||
### 3. mtls-agent(サイドカープロキシ)✅
|
||||
各サービスのサイドカーとして動作するmTLSプロキシ
|
||||
|
||||
**機能:**
|
||||
- プレーンTCPプロキシモード
|
||||
- TLS/mTLSサーバモード(`rustls`ベース)
|
||||
- モード切替(`plain`/`tls`/`mtls`/`auto`)
|
||||
- Chainfire統合(ServiceDiscovery)
|
||||
- サービス発見とキャッシュ(30秒TTL)
|
||||
- mTLSポリシー適用
|
||||
- PolicyEnforcer実装
|
||||
|
||||
### 4. cert-authority(証明書発行機構)✅
|
||||
mTLS用証明書の発行・管理
|
||||
|
||||
**機能:**
|
||||
- CA証明書生成(`init-ca`)
|
||||
- 証明書発行(`issue`)
|
||||
- Chainfireへの証明書バインディング記録
|
||||
- 証明書ローテーションチェック(`check-rotation`)
|
||||
|
||||
**実装詳細:**
|
||||
- rcgen 0.13 APIを使用
|
||||
- `CertificateParams::self_signed()`でCA証明書生成
|
||||
- `CertificateParams::signed_by()`でCA署名証明書発行
|
||||
- x509-parserによる証明書有効期限チェック
|
||||
|
||||
**注意事項:**
|
||||
- 現在の実装では、CSRファイルは読み込まれず、新しいキーペアが自動生成されます
|
||||
- CA証明書の読み込みは、CA証明書のパラメータを再構築する方式を採用しています
|
||||
- 実際の運用では、既存のCA証明書をパースする機能が必要になる可能性があります
|
||||
|
||||
### 5. ChainfireWatcher ✅
|
||||
Chainfire上の変更を監視するユーティリティ
|
||||
|
||||
**機能:**
|
||||
- ポーリングベースの変更検知
|
||||
- Revision管理
|
||||
|
||||
## 全コンポーネントのビルド成功
|
||||
|
||||
```bash
|
||||
✅ deployer-ctl: ビルド成功
|
||||
✅ node-agent: ビルド成功
|
||||
✅ mtls-agent: ビルド成功
|
||||
✅ cert-authority: ビルド成功(rcgen API実装完了)
|
||||
```
|
||||
|
||||
## 証明書管理の実装完了
|
||||
|
||||
rcgen 0.13のAPIを使用して、以下の機能を実装しました:
|
||||
|
||||
1. **CA証明書生成**: `CertificateParams::self_signed()`を使用
|
||||
2. **証明書発行**: `CertificateParams::signed_by()`を使用
|
||||
3. **証明書ローテーション**: x509-parserによる有効期限チェック
|
||||
|
||||
詳細は`docs/cert-authority-usage.md`を参照してください。
|
||||
|
||||
## まとめ
|
||||
|
||||
PhotonCloud Bare-Metal Service Meshの実装が完全に完了しました。証明書管理機能を含む全ての主要コンポーネントが実装され、ビルドに成功しています。
|
||||
|
||||
Kubernetesなしで、ベアメタル環境におけるサービスメッシュ風のmTLS通信、サービス発見、プロセス管理、証明書管理を実現できるフレームワークとなっています。
|
||||
448
docs/nixos-deployment-challenges.md
Normal file
448
docs/nixos-deployment-challenges.md
Normal file
|
|
@ -0,0 +1,448 @@
|
|||
# NixOSデプロイメントの課題と改善案
|
||||
|
||||
## 概要
|
||||
|
||||
このドキュメントは、PhotonCloudプロジェクトにおけるNixOSベースのベアメタルデプロイメントに関する現状分析、課題、および改善案をまとめたものです。
|
||||
|
||||
## 目次
|
||||
|
||||
1. [現状の実装状況](#現状の実装状況)
|
||||
2. [課題の分析](#課題の分析)
|
||||
3. [他のシステムとの比較](#他のシステムとの比較)
|
||||
4. [スケーリングの課題](#スケーリングの課題)
|
||||
5. [改善案](#改善案)
|
||||
6. [優先度とロードマップ](#優先度とロードマップ)
|
||||
|
||||
---
|
||||
|
||||
## 現状の実装状況
|
||||
|
||||
### 実装済みの機能
|
||||
|
||||
#### A. Netboot → nixos-anywhere でのインストール経路
|
||||
|
||||
- **netbootイメージ**: `nix/images/*` と `baremetal/image-builder/build-images.sh` で生成可能
|
||||
- **PXEサーバー**: `chainfire/baremetal/pxe-server/assets` へのコピーまで想定済み
|
||||
- **VMクラスタ検証**: `baremetal/vm-cluster/` にスクリプトが揃っている
|
||||
- **デプロイフロー**: PXE起動 → SSH接続 → disko + nixos-install(=nixos-anywhere)の流れが確立
|
||||
|
||||
**評価**: deploy-rs/colmena系よりベアメタル寄りの王道路線として成立している。
|
||||
|
||||
**ただし**: 速度は**バイナリキャッシュの有無**と**再ビルドの頻度**に大きく依存する。
|
||||
|
||||
#### B. Bootstrap ISO(phone-home → 自動パーティション → nixos-install)経路
|
||||
|
||||
- **ISO生成**: `nix/iso/plasmacloud-iso.nix` に実装済み
|
||||
- **自動化フロー**:
|
||||
- Deployerへの `POST /api/v1/phone-home`
|
||||
- `disko` 実行
|
||||
- `nixos-install --flake ...`
|
||||
- **Deployer API**: `deployer/` にHTTP API実装あり(`/api/v1/phone-home`)
|
||||
|
||||
**評価**: 形は整っているが、**本番でのゼロタッチ運用**には未成熟。
|
||||
|
||||
#### C. 構成管理(NixOSモジュール + クラスタ設定生成)
|
||||
|
||||
- **サービスモジュール**: `nix/modules/` に各サービスがモジュール化済み
|
||||
- **cluster-config.json生成**: `plasmacloud.cluster`(`nix/modules/plasmacloud-cluster.nix`)で `/etc/nixos/secrets/cluster-config.json` を生成
|
||||
|
||||
### 実装済み機能 ✅
|
||||
|
||||
#### (1) トポロジ→cluster-config→first-bootの一貫したルート
|
||||
|
||||
- ✅ `plasmacloud-cluster.nix` でクラスタトポロジから `cluster-config.json` を自動生成
|
||||
- ✅ `environment.etc."nixos/secrets/cluster-config.json"` でファイルが自動配置される
|
||||
- ✅ `first-boot-automation.nix` がcluster-config.jsonを読み込んでサービス間の接続を自動化
|
||||
|
||||
#### (2) Deployerの実運用要件
|
||||
|
||||
- ✅ SSH host key 生成: `LocalStorage.get_or_generate_ssh_host_key()` で ED25519 鍵を生成・永続化
|
||||
- ✅ TLS証明書配布: `LocalStorage.get_or_generate_tls_cert()` で自己署名証明書を生成・永続化
|
||||
- ✅ machine-id → node割当: pre_register API + in-memory fallback 実装済み
|
||||
- ✅ ChainFire非依存: `local_state_path` がデフォルトで設定され、LocalStorage を優先使用
|
||||
|
||||
#### (3) netbootイメージの最適化
|
||||
|
||||
- ✅ `netboot-base.nix`: 超軽量インストーラ専用イメージ(サービスバイナリなし)
|
||||
- ✅ `netboot-worker.nix`: netboot-base.nix をベースに使用
|
||||
- ✅ `netboot-control-plane.nix`: netboot-base.nix をベースに使用
|
||||
- ✅ サービスバイナリは nixos-anywhere でインストール時に追加(netboot には含めない)
|
||||
|
||||
### 残りの改善点
|
||||
|
||||
#### ISOの最適化(Phase 2以降)
|
||||
|
||||
- ISOは `isoImage.contents = [ { source = ../../.; ... } ]` で **リポジトリ丸ごとISOに埋め込み**になっており、変更のたびに再パック&評価対象が増えやすい
|
||||
- 将来的には必要なファイルのみを含めるように最適化する
|
||||
|
||||
---
|
||||
|
||||
## 課題の分析
|
||||
|
||||
### 「途方もない時間がかかる」問題の根本原因
|
||||
|
||||
#### 最大のボトルネック: Rustパッケージの `src = ./.` が重すぎる
|
||||
|
||||
`flake.nix` のRustビルドは `src = repoSrc = ./.;` になっており、これにより:
|
||||
|
||||
- `docs/` や `baremetal/` など **ビルドに無関係な変更でも全Rustパッケージが再ビルド**され得る
|
||||
- さらに最悪なのは、`deployer/target/` のような **巨大で変動する成果物ディレクトリが混入している場合、毎回ソースハッシュが変わってキャッシュが死ぬ**こと
|
||||
- 結果:毎回「初回ビルド」に近い時間が発生
|
||||
|
||||
**ここが直るだけで「体感の遅さ」が一段落ちる可能性が高い。**
|
||||
|
||||
#### その他のボトルネック
|
||||
|
||||
1. **netbootイメージが肥大化**
|
||||
- サービスバイナリや重いツールをnetbootに含めている
|
||||
- initrd配布もビルドも遅くなる
|
||||
|
||||
2. **ISOにリポジトリ全体を埋め込み**
|
||||
- 変更のたびにISO再ビルドが必要
|
||||
- 評価対象が増える
|
||||
|
||||
**注意**:
|
||||
- **リモートバイナリキャッシュ(Cachix/Attic)は後回し**(Phase 3で実装)
|
||||
- Deployer[Bootstrapper]では**ローカルNixストアのキャッシュ**を活用する前提
|
||||
|
||||
---
|
||||
|
||||
## 他のシステムとの比較
|
||||
|
||||
### cloud-init との比較
|
||||
|
||||
**cloud-initの得意領域**: 既に焼いたOSイメージ(主にクラウドVM)に対して、初回起動時にユーザデータ/メタデータで「最後のひと押し」をする
|
||||
|
||||
**このプロジェクトの得意領域**: そもそもOSとサービス構成をNixで宣言し、**同一の入力から同一のシステム**を作る(= cloud-initより上流)
|
||||
|
||||
**評価**:
|
||||
- **置き換え関係というより補完**。cloud-initは「既存OSに後付けで整える」方向、NixOSは「最初からそれがOSの本体」。
|
||||
- 速度面は、**バイナリキャッシュがあるなら** NixOSでも十分実用レンジに寄るが、**キャッシュ無しだとcloud-init(既成イメージ前提)の圧勝**になりがち。
|
||||
|
||||
### Ansible との比較
|
||||
|
||||
**Ansibleの強み**: 既存の多様なOSに対して、成熟したエコシステムで「変更差分を適用」しやすい
|
||||
|
||||
**NixOSの強み**: 変更適用が「宣言→生成→スイッチ」で、**ドリフト/雪片化を構造的に起こしにくい**
|
||||
|
||||
**評価**:
|
||||
- **同じ「構成管理」領域ではかなり戦える**。特にクラスタ基盤(あなたのプロジェクトのコア)みたいに「全ノード同質で、更新頻度も高く、止められない」世界はNixが刺さりやすい。
|
||||
- ただし現状だと、Ansibleが当たり前に持っている **実運用の周辺機能**(インベントリ、秘密情報配布の標準手、実行ログ/監査、段階ロールアウト、失敗時の自動復旧/再試行設計)が、Nix側では自作領域になりがち。ここをDeployerで埋める設計。
|
||||
|
||||
### OpenStack(Ironic等のベアメタル)との比較
|
||||
|
||||
**Ironicの強み(Day0の王者)**:
|
||||
- IPMI/Redfish等のBMCで電源制御
|
||||
- PXE/iPXE、インスペクション(ハードウェア自動検出)
|
||||
- クリーニング(ディスク消去)、RAID/BIOS設定
|
||||
- 大規模・マルチテナント前提の運用(権限、クオータ、ネットワーク統合)
|
||||
|
||||
**このプロジェクトの現状**:
|
||||
- PXE/Netboot・ISO・disko・nixos-install・first-boot は揃っている
|
||||
- でも **BMC連携/インスペクション/クリーニング/多数ノードの状態機械**は薄い(Deployerがその芽)
|
||||
|
||||
**評価**:
|
||||
- **Ironicの「同じ土俵」ではまだ厳しい**(特に「台数が増えた時に壊れない運用」)。
|
||||
- 逆に言うと、Ironicが重い/過剰な環境(単一DC・少〜中規模・同一HW寄り・「クラウド基盤自体をNixOSでガチガチに固めたい」)では、**NixOS方式は運用コストと一貫性で勝ち筋がある**。
|
||||
|
||||
**実務的な勝ち筋**:
|
||||
- **小〜中規模はNixOS主導で十分戦える**(ただしキャッシュ導入と、ビルド入力の安定化が必須)。
|
||||
- **大規模/多拠点/多機種/マルチテナントのDay0は、Ironic相当の機能をどこかで用意する必要がある**。
|
||||
- 現実解は「**Day0はIronicや既存のプロビジョナに寄せて、Day1/Day2をNixOSで統一**」が強い。
|
||||
|
||||
---
|
||||
|
||||
## スケーリングの課題
|
||||
|
||||
### 10,000台規模での問題点
|
||||
|
||||
#### 1. Deployerサーバーが単一インスタンス前提
|
||||
|
||||
- `axum::serve(listener, app)` で単一HTTPサーバーとして動作
|
||||
- 10,000台が同時にPhone Homeすると、**単一プロセスが全リクエストを処理**する必要がある
|
||||
- CPU/メモリ/ネットワークI/Oがボトルネック
|
||||
|
||||
#### 2. 状態管理はChainFireで分散可能だが、Deployer側の調整がない
|
||||
|
||||
- ChainFireはRaftベースで分散可能
|
||||
- しかし、**Deployerインスタンス間の調整**(リーダー選出、ジョブ分散、ロック)がない
|
||||
- 複数Deployerを起動しても、**同じジョブを重複実行**する可能性
|
||||
|
||||
#### 3. デプロイジョブの管理がない
|
||||
|
||||
- Phone Homeはあるが、**「nixos-anywhereを実行する」ジョブの管理**がない
|
||||
- 10,000台を順次デプロイする場合、**キューイング/並列制御/リトライ**が必要
|
||||
|
||||
### 他のシステムとの比較(スケーリング設計)
|
||||
|
||||
#### OpenStack Ironic
|
||||
```
|
||||
API層: 複数インスタンス + ロードバランサー
|
||||
ワーカー層: 複数conductorで並列処理
|
||||
状態管理: PostgreSQL(共有DB)
|
||||
ジョブキュー: RabbitMQ(分散キュー)
|
||||
```
|
||||
|
||||
#### Ansible Tower
|
||||
```
|
||||
Web層: 複数インスタンス
|
||||
ワーカー層: Celery workers(スケーラブル)
|
||||
状態管理: PostgreSQL
|
||||
ジョブキュー: Redis
|
||||
```
|
||||
|
||||
#### Kubernetes Controller
|
||||
```
|
||||
コントローラー層: 複数インスタンス + Leader Election
|
||||
状態管理: etcd
|
||||
並列処理: ワーカーPodで分散
|
||||
```
|
||||
|
||||
### 10,000台規模での性能見積もり
|
||||
|
||||
**現状(単一インスタンス)**:
|
||||
- Phone Home: **10,000リクエスト ÷ 1サーバー = 10,000リクエスト/サーバー**
|
||||
- デプロイ: **順次実行 = 10,000台 ÷ 1ワーカー = 非常に遅い**
|
||||
|
||||
**改善後(API層10台 + ワーカー100台)**:
|
||||
- Phone Home: **10,000リクエスト ÷ 10サーバー = 1,000リクエスト/サーバー**(10倍高速化)
|
||||
- デプロイ: **10,000台 ÷ 100ワーカー = 100台/ワーカー**(並列実行で大幅短縮)
|
||||
|
||||
**例**: 1台あたり10分かかる場合
|
||||
- 現状: **10,000台 × 10分 = 100,000分(約69日)**
|
||||
- 改善後: **100台/ワーカー × 10分 = 1,000分(約17時間)**
|
||||
|
||||
---
|
||||
|
||||
## 改善案
|
||||
|
||||
### Deployer[Bootstrapper]の位置づけ
|
||||
|
||||
現状のDeployer実装は **Deployer[Bootstrapper]** として位置づけ、以下の前提で設計する:
|
||||
|
||||
- **実行環境**: 仮設マシンや手元のマシン(deploy-rsのように)
|
||||
- **役割**: 0→1の初期デプロイ(クラスタの最初の数台)
|
||||
- **独立性**: 他のソフトウェア(ChainFire、FlareDB等)から**完全に独立**している必要がある
|
||||
- **キャッシュ前提**: 手元/仮設マシンにはNixストアのキャッシュがあるため、リビルドは多くないはず
|
||||
|
||||
**将来の移行**: ある程度デプロイが進んだら、完全に自動なデプロイ環境(キャッシュ実装済み、ISOはオブジェクトストレージ、スケーラブル)に移行する。ただし、この完全自動デプロイ環境の実装は**他のソフトウェアが安定してから**にしたい。
|
||||
|
||||
### (将来)リモートflake化 + バイナリキャッシュ(Phase 3以降)
|
||||
|
||||
**目的**: ビルド時間を大幅に短縮(完全自動デプロイ環境用)
|
||||
|
||||
**実装内容**(Phase 3で実装):
|
||||
1. **リモートにflakeを置く**(GitHub等)
|
||||
- **注意**: 現在のコードベースは大胆に変更される可能性があるため、GitHubへの公開は後回し
|
||||
2. **バイナリキャッシュを用意**(Cachix、セルフホストならattic等)
|
||||
3. `flake.nix` の `nixConfig` と、`nix/images/netboot-base.nix` / 各ノード設定に **substituters/trusted-public-keys** を入れて、netboot/ISO/インストール時のnixが自動でキャッシュを引くようにする
|
||||
|
||||
**効果**: nixos-anywhere の実体が「ビルド」から「ダウンロード」に変わる。
|
||||
|
||||
**優先度**: **Phase 3以降**(完全自動デプロイ環境の実装時)。Deployer[Bootstrapper]では**ローカルで動くことを優先**し、キャッシュ系は後回し。
|
||||
|
||||
### P0: `src = ./.` をやめ、ソースをフィルタする ✅ 実装済み
|
||||
|
||||
**目的**: 無関係な変更で再ビルドが発生しないようにする
|
||||
|
||||
**実装内容** (`flake.nix` の `repoSrc`):
|
||||
```nix
|
||||
repoSrc = pkgs.lib.cleanSourceWith {
|
||||
src = ./.;
|
||||
filter = path: type:
|
||||
! (dropPrefix [ "docs/" "baremetal/" ".git/" ".cccc/" "result" "result-" ] ||
|
||||
base == "target" ||
|
||||
dropSuffix [ ".qcow2" ".img" ".iso" ".qcow" ]);
|
||||
};
|
||||
```
|
||||
|
||||
**除外されるファイル/ディレクトリ**:
|
||||
- ✅ `**/target/`(Cargoビルド成果物)
|
||||
- ✅ `docs/`, `baremetal/`(Rustビルドに不要)
|
||||
- ✅ `.git/`, `.cccc/`, `result*`(Nix成果物)
|
||||
- ✅ `.qcow2`, `.img`, `.iso`, `.qcow`(大きなバイナリファイル)
|
||||
|
||||
**効果**: ソース変更がなければNixのキャッシュが効き、再ビルドを回避。
|
||||
|
||||
### P1: netbootは「最小のインストーラ」に寄せる ✅ 実装済み
|
||||
|
||||
**目的**: netbootイメージのサイズとビルド時間を削減
|
||||
|
||||
**実装内容** (`nix/images/netboot-base.nix`):
|
||||
- ✅ `netboot-base.nix`: 最小限のインストーラツールのみ(disko, parted, curl, jq等)
|
||||
- ✅ サービスバイナリや仮想化ツールは含めない
|
||||
- 役割:netbootは「SSHで入れてnixos-anywhereできる」だけに絞る
|
||||
- サービスは **インストール後のNixOS構成**で入れる方が速く・安全
|
||||
|
||||
**効果**: initrd配布もビルドも速くなる。
|
||||
|
||||
### P1: トポロジ生成とfirst-bootの接続を完成させる ✅ 実装済み
|
||||
|
||||
**目的**: 構成管理の運用ループを完成させる
|
||||
|
||||
**実装内容**:
|
||||
- ✅ `plasmacloud-cluster.nix`: クラスタトポロジ定義と `cluster-config.json` の自動生成
|
||||
- ✅ `first-boot-automation.nix`: cluster-config.json を読み込んでChainfire/FlareDB/IAMへの自動接続
|
||||
- ✅ `environment.etc."nixos/secrets/cluster-config.json"` でファイル配置
|
||||
|
||||
**効果**: 「構成管理」が「運用の自動化」に直結する。
|
||||
|
||||
### P2: ISOルートは「本番のゼロタッチ」に必要な要件を埋める(Phase 2以降)
|
||||
|
||||
**目的**: ISOベースの自動デプロイを本番対応にする
|
||||
|
||||
**実装内容**:
|
||||
- ✅ Deployerの鍵・証明書生成は実装済み(`LocalStorage.get_or_generate_*`)
|
||||
- TODO: ISO内で disko を同梱してローカル実行に寄せる(現状はネットワーク依存)
|
||||
|
||||
### P1: Deployer[Bootstrapper]の独立性確保 ✅ 実装済み
|
||||
|
||||
**目的**: 他のソフトウェア(ChainFire、FlareDB等)に依存しない独立したデプロイツールにする
|
||||
|
||||
**実装内容** (`deployer/crates/deployer-server/`):
|
||||
- ✅ `LocalStorage`: ローカルファイルベースのストレージ(ChainFire不要)
|
||||
- ✅ `config.local_state_path`: デフォルトで `/var/lib/deployer/state` に設定
|
||||
- ✅ `state.init_storage()`: `local_state_path` があれば LocalStorage を優先使用
|
||||
- ✅ Phone Home API: 簡易HTTPサーバーとして動作(ChainFire不要)
|
||||
- ✅ SSH host key / TLS証明書: LocalStorage で永続化
|
||||
|
||||
**効果**: ChainFire等が動いていなくても、Deployer[Bootstrapper]だけでデプロイが可能。
|
||||
|
||||
**将来**: Phase 3 で ChainFire との統合を実装(大規模デプロイ用)。
|
||||
|
||||
### (将来)完全自動デプロイ環境の設計
|
||||
|
||||
**目的**: 大規模デプロイ(10,000台規模)に対応した、完全に自動化されたデプロイ環境
|
||||
|
||||
**実装内容**(Phase 3で実装):
|
||||
- **API層のStateless化**: Phone Homeリクエストを複数APIサーバーで分散処理
|
||||
- **ワーカー層の追加**: デプロイジョブを並列実行(ChainFireベースのジョブキュー)
|
||||
- **ISOのオブジェクトストレージ配布**: LightningStor等にISOを保存し、高速配布
|
||||
- **バイナリキャッシュの完全実装**: すべてのビルド成果物をキャッシュ
|
||||
|
||||
**効果**: マシンをいくら増やしても高速でデプロイできる。
|
||||
|
||||
**前提条件**: 他のソフトウェア(ChainFire、FlareDB、LightningStor等)が安定してから実装する。
|
||||
|
||||
---
|
||||
|
||||
## 優先度とロードマップ
|
||||
|
||||
### Phase 1: Deployer[Bootstrapper]の改善 ✅ 完了
|
||||
|
||||
**目標**: 0→1の初期デプロイを高速化・安定化(**ローカルで動くことを優先**)
|
||||
|
||||
1. ✅ **`src` フィルタリング**(`target/` や `docs/` を除外)
|
||||
- `flake.nix` の `repoSrc` で実装済み
|
||||
- ソース変更がなければ、Nixのキャッシュが効き、Cargoの再ビルドも避けられる
|
||||
2. ✅ **Deployer[Bootstrapper]の独立性確保**
|
||||
- `LocalStorage` でChainFire非依存
|
||||
- `local_state_path` がデフォルトで設定
|
||||
3. ✅ **netbootイメージの最小化**(サービスバイナリを除外)
|
||||
- `netboot-base.nix` を最適化
|
||||
- `netboot-worker.nix`, `netboot-control-plane.nix` が netboot-base をベースに使用
|
||||
4. ✅ **トポロジ→first-boot接続**
|
||||
- `plasmacloud-cluster.nix` でクラスタトポロジ定義と cluster-config.json を自動生成
|
||||
- `first-boot-automation.nix` でサービス間の自動接続
|
||||
5. ✅ **SSH/TLS鍵生成**
|
||||
- `phone_home.rs` で ED25519 鍵と自己署名証明書を生成・永続化
|
||||
|
||||
**達成効果**:
|
||||
- Deployer[Bootstrapper]が他のソフトウェアから独立し、安定して動作
|
||||
- ソース変更がなければ、ビルド時間が大幅に短縮
|
||||
- Cachix/Attic連携なしでもローカルで動作
|
||||
|
||||
**実行環境**: 手元/仮設マシン(Nixストアのキャッシュがある前提)
|
||||
|
||||
### Phase 2: 他のソフトウェアの安定化(数ヶ月)
|
||||
|
||||
**目標**: ChainFire、FlareDB、IAM等のコアサービスの安定化
|
||||
|
||||
1. **コアサービスの機能完成**
|
||||
2. **クラスタ運用の安定化**
|
||||
3. **監視・ログ・バックアップ等の運用基盤の整備**
|
||||
|
||||
**期待効果**: 完全自動デプロイ環境を構築する基盤が整う
|
||||
|
||||
### Phase 3: 完全自動デプロイ環境の実装(将来、Phase 2完了後)
|
||||
|
||||
**目標**: 大規模デプロイ(10,000台規模)に対応した、完全に自動化されたデプロイ環境
|
||||
|
||||
1. **リモートflake化** + **バイナリキャッシュ導入**(Cachix/attic)
|
||||
- GitHub等への公開(コードベースが安定してから)
|
||||
- Cachix/Attic連携によるバイナリキャッシュ
|
||||
2. **API層のStateless化** + **ワーカー層の追加**
|
||||
3. **ジョブキューの実装**(ChainFireベース)
|
||||
4. **ISOのオブジェクトストレージ配布**(LightningStor等)
|
||||
5. **Deployerの鍵・証明書・インベントリ管理の実装**
|
||||
|
||||
**期待効果**:
|
||||
- マシンをいくら増やしても高速でデプロイできる
|
||||
- 完全に自動化されたゼロタッチデプロイが可能
|
||||
|
||||
**前提条件**:
|
||||
- Phase 2(他のソフトウェアの安定化)が完了していること
|
||||
- コードベースが安定し、GitHub等への公開が可能になったこと
|
||||
|
||||
---
|
||||
|
||||
## まとめ
|
||||
|
||||
### 現状の評価
|
||||
|
||||
このプロジェクトは、**NixOSベースのベアメタル配備に必要な部品がすべて揃い、Phase 1が完了**している:
|
||||
|
||||
#### ✅ Phase 1 完了項目
|
||||
|
||||
1. **Deployer[Bootstrapper]の独立性**: LocalStorage でChainFire非依存
|
||||
2. **キャッシュ効率化**: `repoSrc` フィルタリングで不要ファイルを除外
|
||||
3. **netboot最小化**: `netboot-base.nix` でインストーラ専用イメージ
|
||||
4. **トポロジ→first-boot接続**: cluster-config.json 自動生成
|
||||
5. **SSH/TLS鍵生成**: ED25519 鍵と自己署名証明書の生成・永続化
|
||||
|
||||
#### 残りの課題
|
||||
|
||||
1. **完全自動デプロイ環境の未実装**: 大規模デプロイに対応するための基盤(Phase 3で実装)
|
||||
|
||||
### 段階的なアプローチ
|
||||
|
||||
**Phase 1 ✅ 完了**: Deployer[Bootstrapper]の改善
|
||||
- 0→1の初期デプロイを高速化・安定化
|
||||
- 他のソフトウェアから独立
|
||||
- 手元/仮設マシンで実行可能
|
||||
|
||||
**Phase 2(現在)**: 他のソフトウェアの安定化
|
||||
- ChainFire、FlareDB、IAM等のコアサービスの安定化
|
||||
- クラスタ運用の確立
|
||||
|
||||
**Phase 3(将来)**: 完全自動デプロイ環境の実装
|
||||
- 大規模デプロイ(10,000台規模)に対応
|
||||
- 完全に自動化されたゼロタッチデプロイ
|
||||
- Phase 2完了後に実装
|
||||
|
||||
### 達成済みの成功条件
|
||||
|
||||
1. ✅ **Deployer[Bootstrapper]の独立性**: 他のソフトウェアが動いていなくても、デプロイが可能
|
||||
2. ✅ **ローカルでの動作優先**: Cachix/Attic連携なしでも、ローカルNixストアのキャッシュで動作
|
||||
3. ✅ **キャッシュの効率化**: `src` フィルタリングで、ソース変更がなければNixのキャッシュが効く
|
||||
4. ✅ **トポロジ→first-boot接続**: plasmacloud-cluster.nix からの設定生成が機能
|
||||
5. ✅ **SSH/TLS鍵の永続化**: LocalStorage で鍵を永続化
|
||||
|
||||
### 次のステップ
|
||||
|
||||
1. ~~Phase 1を最優先で実装~~ ✅ **完了**
|
||||
2. **Phase 2で他のソフトウェアを安定化**(基盤の確立)
|
||||
3. **Phase 3で完全自動デプロイ環境を実装**(大規模対応)
|
||||
- コードベースが安定してから、リモートflake化とバイナリキャッシュを実装
|
||||
|
||||
**Phase 1が完了し、0→1のデプロイが可能になった。次はPhase 2でコアサービスの安定化を進める。**
|
||||
|
||||
---
|
||||
|
||||
## 参考資料
|
||||
|
||||
- [NixOS Netboot](https://nixos.wiki/wiki/Netboot)
|
||||
- [nixos-anywhere](https://github.com/nix-community/nixos-anywhere)
|
||||
- [disko](https://github.com/nix-community/disko)
|
||||
- [Cachix](https://www.cachix.org/)
|
||||
- [attic](https://github.com/zhaofengli/attic)
|
||||
43
docs/ops/integration-matrix.md
Normal file
43
docs/ops/integration-matrix.md
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# Integration Matrix Gate
|
||||
|
||||
Release gate that exercises the PROJECT.md matrix (chainfire → flaredb → plasmavmc → creditservice → nightlight).
|
||||
|
||||
## Release hook
|
||||
- Run this matrix **before any release cut** (tag/publish). Command: `nix develop -c ./scripts/integration-matrix.sh`.
|
||||
- After a green run, copy logs from `.cccc/work/integration-matrix/<timestamp>/` to `docs/evidence/integration-matrix-<timestamp>/` and reference the path in release notes.
|
||||
- If KVM is unavailable, use `SKIP_PLASMA=1` only as a temporary measure; restore full run once nested KVM is enabled.
|
||||
- Defaults: script now auto-creates a tiny qcow2 in `LOG_DIR` and picks `qemu-system-x86_64` from PATH; set `PLASMA_E2E=1` to run PlasmaVMC ignored e2e once qcow/QEMU is available.
|
||||
|
||||
## Prerequisites
|
||||
- Cluster services reachable (ChainFire, FlareDB, PlasmaVMC, CreditService, NightLight).
|
||||
- Nested KVM available for PlasmaVMC tests; run `sudo scripts/nested-kvm-check.sh` on hosts.
|
||||
- `cargo` toolchain present on the runner.
|
||||
- For PlasmaVMC e2e (once qcow is provided): set `PLASMAVMC_QEMU_PATH` and `PLASMAVMC_QCOW2_PATH` to enable QEMU-backed tests; the script will set best-effort defaults if unset.
|
||||
|
||||
## How to run
|
||||
```
|
||||
# Dry run (prints commands, no tests)
|
||||
DRY_RUN=1 scripts/integration-matrix.sh
|
||||
|
||||
# Full run (all legs)
|
||||
scripts/integration-matrix.sh
|
||||
|
||||
# Skip PlasmaVMC leg if KVM unavailable
|
||||
SKIP_PLASMA=1 scripts/integration-matrix.sh
|
||||
|
||||
# PlasmaVMC ignored e2e (requires QEMU + qcow; defaults auto-provisioned if available)
|
||||
PLASMA_E2E=1 scripts/integration-matrix.sh
|
||||
```
|
||||
|
||||
Logs are written to `.cccc/work/integration-matrix/<timestamp>/` by default; override with `LOG_DIR=...` if needed.
|
||||
|
||||
## What it covers
|
||||
1) chainfire → flaredb: Raft+Gossip cluster write/read with failover path (cargo tests).
|
||||
2) flaredb → plasmavmc: VM metadata durability across leader switch (cargo tests).
|
||||
3) plasmavmc → creditservice: Admission Control CAS/rollback under contention (cargo tests).
|
||||
4) creditservice → nightlight: Metrics feeding billing/alerts (cargo tests).
|
||||
5) end-to-end (future harness): tenant loop with FiberLB/FlashDNS once approved; runs will emit junit/json artifacts to `.cccc/work/results/`.
|
||||
|
||||
## Notes
|
||||
- Use `DRY_RUN=1` on CI to verify wiring without requiring KVM.
|
||||
- If nested KVM is disabled, enable via NixOS (`boot.extraModprobeConfig = "options kvm-intel nested=1";` or kvm-amd) and reboot once. Refer to `scripts/nested-kvm-check.sh` for the exact snippet.
|
||||
38
docs/ops/nested-kvm-setup.md
Normal file
38
docs/ops/nested-kvm-setup.md
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# PlasmaVMC Nested KVM & App Validation (Draft)
|
||||
|
||||
## Nested KVM quick check
|
||||
1) On host: `cat /sys/module/kvm_intel/parameters/nested` (or `kvm_amd`). Expect `Y` for enabled, `N` for disabled.
|
||||
2) If disabled (Intel example):
|
||||
```
|
||||
boot.kernelModules = [ "kvm-intel" ];
|
||||
boot.extraModprobeConfig = ''
|
||||
options kvm-intel nested=1
|
||||
'';
|
||||
```
|
||||
For AMD, use `kvm-amd` and `options kvm-amd nested=1`.
|
||||
3) Reboot once, verify again.
|
||||
4) Inside a guest VM: prove nesting with a minimal KVM launch:
|
||||
```
|
||||
qemu-system-x86_64 -accel kvm -cpu host -m 512 -nographic \
|
||||
-kernel /run/current-system/kernel -append "console=ttyS0" < /dev/null
|
||||
```
|
||||
If it boots to kernel console, nesting works.
|
||||
|
||||
## App scenario (lightweight)
|
||||
- Topology: 2x app VMs on PrismNET, FiberLB front, FlashDNS record -> LB VIP.
|
||||
- Data: FlareDB SQL (guestbook-style) for metadata; ChainFire backs control-plane metadata.
|
||||
- Controls: CreditService Admission Control enforced on VM create (low quota); NightLight metrics exported.
|
||||
|
||||
### Steps
|
||||
1) Provision: create 2 VMs via PlasmaVMC API; attach PrismNET network; ensure watcher persists VM metadata to FlareDB.
|
||||
2) Configure: deploy small web app on each VM that writes/reads FlareDB SQL; register DNS record in FlashDNS pointing to FiberLB listener.
|
||||
3) Gate: set low wallet balance; attempt VM create/update to confirm CAS-based debit and rollback on failure.
|
||||
4) Observe: ensure NightLight scrapes app + system metrics; add alerts for latency > target and billing failures.
|
||||
5) Failover drills:
|
||||
- Kill one app VM: FiberLB should reroute; CreditService must not double-charge retries.
|
||||
- Restart PlasmaVMC node: watcher should replay state from FlareDB/ChainFire; VM lifecycle ops continue.
|
||||
6) Exit criteria: all above steps pass 5x in a row; NightLight shows zero SLO violations; CreditService balances consistent before/after drills.
|
||||
|
||||
## Notes
|
||||
- Full disk HA not covered; for disk replication we’d need distributed block (future).
|
||||
- Keep tests env-gated (ignored by default) so CI doesn’t require nested virt.
|
||||
26
docs/ops/qcow2-artifact-plan.md
Normal file
26
docs/ops/qcow2-artifact-plan.md
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
## PlasmaVMC qcow artifact plan (for integration gate e2e)
|
||||
|
||||
- Goal: provide a reproducible qcow2 image + env wiring so plasmavmc e2e (QEMU-backed) can run in the integration matrix without manual prep.
|
||||
- Constraints: small (<150MB), no network during gate run, works under nix develop; use virtio drivers; avoid licensing issues.
|
||||
|
||||
### Candidate image
|
||||
- Alpine cloud image (latest stable) is small and permissively licensed; includes virtio modules.
|
||||
- Fallback: Build a 1G qcow2 via `qemu-img create -f qcow2 plasma-mini.qcow2 1G` + `virt-make-fs` on a tiny rootfs (busybox/alpine base).
|
||||
|
||||
### Provisioning steps (once, cacheable)
|
||||
1) In nix shell (has qemu-img): `qemu-img convert -f qcow2 -O qcow2 alpine-cloudimg-amd64.qcow2 plasma-mini.qcow2` or `qemu-img create -f qcow2 plasma-mini.qcow2 1G`.
|
||||
2) Inject default user+ssh key (optional) via cloud-init seed ISO or `virt-make-fs` (avoid during gate).
|
||||
3) Store artifact under `.cccc/work/artifacts/plasma-mini.qcow2` (or cache bucket if available).
|
||||
4) Record SHA256 to detect drift.
|
||||
|
||||
### Gate wiring
|
||||
- Env vars: `PLASMAVMC_QEMU_PATH` (e.g., `/run/current-system/sw/bin/qemu-system-x86_64` in nix shell), `PLASMAVMC_QCOW2_PATH` (absolute path to plasma-mini.qcow2).
|
||||
- Update `scripts/integration-matrix.sh` docs to mention envs; optionally add `just integration-matrix [--skip-plasma]` wrapper that injects defaults when present.
|
||||
|
||||
### Time/budget
|
||||
- Download + convert: ~2-3 minutes once; gate runs reuse artifact (no network).
|
||||
- If artifact absent, plasmavmc e2e remain ignored; matrix still green on unit/integration subsets.
|
||||
|
||||
### Open questions
|
||||
- Where to store the qcow2 artifact for CI (git LFS? remote cache?) to avoid repo bloat.
|
||||
- Is cloud-init desirable for tests (SSH into VM) or is raw boot enough for current e2e?
|
||||
89
docs/plans/chainfire_architecture_redefinition.md
Normal file
89
docs/plans/chainfire_architecture_redefinition.md
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
# Chainfire アーキテクチャ再定義案: 分散システム構築基盤への転換
|
||||
|
||||
`Chainfire` を単一の KV ストアサービスから、プロジェクト全体の「分散システム構築フレームワーク」へと位置づけ直すための設計案です。
|
||||
|
||||
## 1. アーキテクチャ概要
|
||||
|
||||
階層構造を整理し、低レイヤーのプリミティブから高レイヤーのマネージドサービスまでを明確に分離します。
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph Application_Layer
|
||||
FlareDB[FlareDB / Distributed DB]
|
||||
LightningStor[lightningstor / Object Storage]
|
||||
IAM[IAM / Control Plane]
|
||||
end
|
||||
|
||||
subgraph L2_Service_Layer_Sidecar
|
||||
CFServer[Chainfire Server]
|
||||
CFServer -- gRPC Streaming --> IAM
|
||||
end
|
||||
|
||||
subgraph L1_Framework_Layer
|
||||
CFCore[chainfire-core]
|
||||
CFCore -- Library Embed --> FlareDB
|
||||
CFCore -- Library Embed --> LightningStor
|
||||
|
||||
MultiRaft[Multi-Raft Orchestrator]
|
||||
CFCore --> MultiRaft
|
||||
end
|
||||
|
||||
subgraph L0_Primitive_Layer
|
||||
Gossip[chainfire-gossip]
|
||||
Raft[chainfire-raft]
|
||||
Storage[chainfire-storage]
|
||||
|
||||
CFCore --> Gossip
|
||||
CFCore --> Raft
|
||||
Raft --> Storage
|
||||
end
|
||||
|
||||
CFServer --> CFCore
|
||||
```
|
||||
|
||||
## 2. 各レイヤーの責務定義
|
||||
|
||||
### L0 Core (Library): primitives
|
||||
- **chainfire-gossip**:
|
||||
- SWIM プロトコルに基づくメンバーシップ管理。
|
||||
- 特定のサービスに依存せず、任意の `NodeMetadata` を伝搬可能にする。
|
||||
- **chainfire-raft**:
|
||||
- 単一 Raft グループのコンセンサスロジック。
|
||||
- `StateMachine` を Trait 化し、任意のビジネスロジックを注入可能にする。
|
||||
- `RaftNetwork` を抽象化し、gRPC 以外(UDS, In-memory)のトランスポートをサポート。
|
||||
- **chainfire-storage**:
|
||||
- Raft ログおよび StateMachine のための永続化レイヤー。
|
||||
|
||||
### L1 Framework: chainfire-core
|
||||
- **Multi-Raft Orchestrator**:
|
||||
- 複数の Raft インスタンス(シャード)を同一プロセス内で効率的に管理。
|
||||
- ネットワーク接続やスレッドプール等のリソース共有を最適化。
|
||||
- **Cluster Manager**:
|
||||
- Gossip のメンバーシップイベントを監視し、Raft グループへのノード追加・削除を自動化。
|
||||
- 「ノード発見(Gossip)」から「合意形成参加(Raft)」への橋渡しを行う。
|
||||
|
||||
### L2 Service: chainfire-server (Standard Implementation)
|
||||
- **Shared Infrastructure**:
|
||||
- KV ストア、分散ロック、リース管理を gRPC API として提供。
|
||||
- 独自に Raft を組む必要のない「軽量サービス」向けの共通基盤。
|
||||
- **Sidecar Mode Support**:
|
||||
- gRPC Streaming による `ClusterEvents` の提供。
|
||||
- リーダー交代やメンバーシップ変更を外部プロセスにリアルタイム通知。
|
||||
|
||||
## 3. 分散サービスでの再利用シナリオ (例: FlareDB)
|
||||
|
||||
FlareDB が Chainfire 基盤をどのように利用して Multi-Raft を構成するかの具体例です。
|
||||
|
||||
1. **ライブラリとして組み込み**: `FlareDB` プロセスが `chainfire-core` をリンク。
|
||||
2. **独自の StateMachine 実装**: FlareDB のデータ操作ロジックを `StateMachine` Trait として実装。
|
||||
3. **シャード管理**:
|
||||
- データのレンジごとに `RaftGroup` インスタンスを作成。
|
||||
- 各 `RaftGroup` に FlareDB 独自の `StateMachine` を登録。
|
||||
4. **ノード管理の委譲**:
|
||||
- Gossip によるノード発見を `chainfire-core` に任せ、FlareDB 側では個別のノードリスト管理を行わない。
|
||||
|
||||
## 4. メリットの整理
|
||||
|
||||
- **開発効率の向上**: Gossip や Raft といった複雑な分散プロトコルの再実装が不要になる。
|
||||
- **観測性の一貫性**: プロジェクト全体の全ノードが共通の Gossip 基盤に乗ることで、システム全体のトポロジー可視化が容易になる。
|
||||
- **柔軟な配置**: 同一のロジックを、ライブラリとして(高パフォーマンス)、あるいはサイドカーとして(疎結合)のどちらでも利用可能。
|
||||
45
docs/plans/metadata_unification.md
Normal file
45
docs/plans/metadata_unification.md
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
# メタデータ管理の Chainfire 一本化に関する調査報告と構成案
|
||||
|
||||
## 1. 調査結果サマリー
|
||||
プロジェクト内の各コンポーネントにおけるメタデータ(設定、リソース定義、状態)の管理状況を調査した結果、現状は `Chainfire` (etcd-like) と `FlareDB` (TiKV-like) が混在しており、メンテナンスコストとシステム複雑性を増大させていることが判明しました。
|
||||
|
||||
### コンポーネント別の現状
|
||||
- **移行が必要**: `k8shost` (現在 FlareDB に強く密結合)
|
||||
- **設定・実装の統一が必要**: `lightningstor`, `flashdns`, `prismnet`, `fiberlb` (既に Chainfire 対応コードを持つが、独自に抽象化を実装)
|
||||
- **対応済み**: `iam`, `creditservice` (既に Chainfire を主に使用)
|
||||
|
||||
## 2. 技術的判断
|
||||
メタデータ実装を **Chainfire に一本化することは妥当かつ推奨される** と判断します。
|
||||
|
||||
### 妥当性の理由
|
||||
- **運用性の向上**: 運用・監視・バックアップの対象を Raft ベースの `Chainfire` 1つに集約できる。
|
||||
- **一貫した連携基盤**: `Chainfire` の `Watch` 機能を共通のイベント基盤として、コンポーネント間(例:Podの変更をネットワーク層が検知)のリアクティブな連携が容易になる。
|
||||
- **コードの健全化**: 依存ライブラリを整理し、各コンポーネントで重複しているストレージ抽象化ロジックを排除できる。
|
||||
|
||||
### リスクへの対策
|
||||
`Chainfire` は全ノード複製型のため、大規模環境での書き込み性能がボトルネックになる懸念があります。これに対し、本案では**共通抽象化インターフェース (Trait)** を導入することで、将来的に特定リソースのみ高性能バックエンドへ再分離できる柔軟性を確保します。
|
||||
|
||||
## 3. 構成案
|
||||
|
||||
### A. 共通モジュール `chainfire-client::metadata` の新設
|
||||
各サービスからストレージ固有の実装を分離し、共通の `MetadataClient` Trait を提供します。
|
||||
|
||||
```rust
|
||||
#[async_trait]
|
||||
pub trait MetadataClient: Send + Sync {
|
||||
async fn get(&self, key: &str) -> Result<Option<Vec<u8>>>;
|
||||
async fn put(&self, key: &str, value: Vec<u8>) -> Result<()>;
|
||||
async fn delete(&self, key: &str) -> Result<bool>;
|
||||
async fn list_prefix(&self, prefix: &str) -> Result<Vec<(String, Vec<u8>)>>;
|
||||
async fn watch(&self, prefix: &str) -> BoxStream<WatchEvent>;
|
||||
async fn compare_and_swap(&self, key: &str, expected_rev: u64, value: Vec<u8>) -> Result<CasOutcome>;
|
||||
}
|
||||
```
|
||||
|
||||
### B. 移行ロードマップ
|
||||
1. **共通基盤の構築**: `chainfire-client::metadata` を実装。`Chainfire` ブリッジとテスト用の `InMemory` バックエンドを提供。
|
||||
2. **k8shost のリファクタリング**: `storage.rs` を `MetadataClient` 経由に書き換え、`flaredb-client` 依存を削除。
|
||||
3. **他コンポーネントの追随**: `lightningstor` 等の独自ストレージ選択ロジックを `chainfire-client::metadata` に置換。
|
||||
|
||||
## 4. 結論
|
||||
本提案により、現状の `FlareDB` マルチテナント実装の複雑さから解放され、開発効率とシステムの一貫性が劇的に向上します。将来的なスケーラビリティ要求に対しても、抽象化レイヤーの導入により十分対応可能です。
|
||||
17
examples/mtls-agent-config.toml
Normal file
17
examples/mtls-agent-config.toml
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
[service]
|
||||
name = "api-server"
|
||||
app_addr = "127.0.0.1:8080"
|
||||
mesh_bind_addr = "0.0.0.0:18080"
|
||||
|
||||
[cluster]
|
||||
cluster_id = "test-cluster-01"
|
||||
environment = "dev"
|
||||
chainfire_endpoint = "http://127.0.0.1:2379"
|
||||
|
||||
[mtls]
|
||||
mode = "auto" # auto/mtls/tls/plain
|
||||
# ca_cert_path = "/etc/photoncloud/ca.crt"
|
||||
# cert_path = "/etc/photoncloud/server.crt"
|
||||
# key_path = "/etc/photoncloud/server.key"
|
||||
|
||||
|
||||
79
examples/photoncloud-test-cluster.json
Normal file
79
examples/photoncloud-test-cluster.json
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
{
|
||||
"cluster": {
|
||||
"cluster_id": "test-cluster-01",
|
||||
"environment": "dev"
|
||||
},
|
||||
"nodes": [
|
||||
{
|
||||
"node_id": "node-01",
|
||||
"hostname": "photon-node-01",
|
||||
"ip": "192.168.100.10",
|
||||
"roles": ["worker"],
|
||||
"labels": {
|
||||
"zone": "zone-a"
|
||||
}
|
||||
},
|
||||
{
|
||||
"node_id": "node-02",
|
||||
"hostname": "photon-node-02",
|
||||
"ip": "192.168.100.11",
|
||||
"roles": ["worker"],
|
||||
"labels": {
|
||||
"zone": "zone-b"
|
||||
}
|
||||
}
|
||||
],
|
||||
"services": [
|
||||
{
|
||||
"name": "api-server",
|
||||
"ports": {
|
||||
"http": 8080,
|
||||
"grpc": 9090
|
||||
},
|
||||
"protocol": "http",
|
||||
"mtls_required": false,
|
||||
"mesh_mode": "agent"
|
||||
},
|
||||
{
|
||||
"name": "worker-service",
|
||||
"ports": {
|
||||
"http": 8081
|
||||
},
|
||||
"protocol": "http",
|
||||
"mtls_required": false,
|
||||
"mesh_mode": "agent"
|
||||
}
|
||||
],
|
||||
"instances": [
|
||||
{
|
||||
"instance_id": "api-server-01",
|
||||
"service": "api-server",
|
||||
"node_id": "node-01",
|
||||
"ip": "192.168.100.10",
|
||||
"port": 8080,
|
||||
"mesh_port": 18080,
|
||||
"version": "v1.0.0"
|
||||
},
|
||||
{
|
||||
"instance_id": "worker-01",
|
||||
"service": "worker-service",
|
||||
"node_id": "node-02",
|
||||
"ip": "192.168.100.11",
|
||||
"port": 8081,
|
||||
"mesh_port": 18081,
|
||||
"version": "v1.0.0"
|
||||
}
|
||||
],
|
||||
"mtls_policies": [
|
||||
{
|
||||
"policy_id": "default-dev",
|
||||
"environment": "dev",
|
||||
"source_service": "*",
|
||||
"target_service": "*",
|
||||
"mtls_required": false,
|
||||
"mode": "plain"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
10
fiberlb/crates/fiberlb-server/build.rs
Normal file
10
fiberlb/crates/fiberlb-server/build.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let protoc_path = protoc_bin_vendored::protoc_bin_path()?;
|
||||
std::env::set_var("PROTOC", protoc_path);
|
||||
|
||||
tonic_build::configure()
|
||||
.build_server(false)
|
||||
.build_client(true)
|
||||
.compile(&["proto/api/gobgp.proto"], &["proto"])?;
|
||||
Ok(())
|
||||
}
|
||||
584
fiberlb/crates/fiberlb-server/proto/api/attribute.proto
Normal file
584
fiberlb/crates/fiberlb-server/proto/api/attribute.proto
Normal file
|
|
@ -0,0 +1,584 @@
|
|||
// Copyright (C) 2018 Nippon Telegraph and Telephone Corporation.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation files
|
||||
// (the "Software"), to deal in the Software without restriction,
|
||||
// including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
// and to permit persons to whom the Software is furnished to do so,
|
||||
// subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package api;
|
||||
|
||||
import "api/common.proto";
|
||||
import "api/extcom.proto";
|
||||
import "api/nlri.proto";
|
||||
|
||||
option go_package = "github.com/osrg/gobgp/v4/api;api";
|
||||
|
||||
message Attribute {
|
||||
oneof attr {
|
||||
UnknownAttribute unknown = 1;
|
||||
OriginAttribute origin = 2;
|
||||
AsPathAttribute as_path = 3;
|
||||
NextHopAttribute next_hop = 4;
|
||||
MultiExitDiscAttribute multi_exit_disc = 5;
|
||||
LocalPrefAttribute local_pref = 6;
|
||||
AtomicAggregateAttribute atomic_aggregate = 7;
|
||||
AggregatorAttribute aggregator = 8;
|
||||
CommunitiesAttribute communities = 9;
|
||||
OriginatorIdAttribute originator_id = 10;
|
||||
ClusterListAttribute cluster_list = 11;
|
||||
MpReachNLRIAttribute mp_reach = 12;
|
||||
MpUnreachNLRIAttribute mp_unreach = 13;
|
||||
ExtendedCommunitiesAttribute extended_communities = 14;
|
||||
As4PathAttribute as4_path = 15;
|
||||
As4AggregatorAttribute as4_aggregator = 16;
|
||||
PmsiTunnelAttribute pmsi_tunnel = 17;
|
||||
TunnelEncapAttribute tunnel_encap = 18;
|
||||
IP6ExtendedCommunitiesAttribute ip6_extended_communities = 19;
|
||||
AigpAttribute aigp = 20;
|
||||
LargeCommunitiesAttribute large_communities = 21;
|
||||
LsAttribute ls = 22;
|
||||
PrefixSID prefix_sid = 23;
|
||||
}
|
||||
}
|
||||
|
||||
message OriginAttribute {
|
||||
uint32 origin = 1;
|
||||
}
|
||||
|
||||
message AsSegment {
|
||||
enum Type {
|
||||
TYPE_UNSPECIFIED = 0;
|
||||
TYPE_AS_SET = 1;
|
||||
TYPE_AS_SEQUENCE = 2;
|
||||
TYPE_AS_CONFED_SEQUENCE = 3;
|
||||
TYPE_AS_CONFED_SET = 4;
|
||||
}
|
||||
Type type = 1;
|
||||
repeated uint32 numbers = 2;
|
||||
}
|
||||
|
||||
message AsPathAttribute {
|
||||
repeated AsSegment segments = 1;
|
||||
}
|
||||
|
||||
message NextHopAttribute {
|
||||
string next_hop = 1;
|
||||
}
|
||||
|
||||
message MultiExitDiscAttribute {
|
||||
uint32 med = 1;
|
||||
}
|
||||
|
||||
message LocalPrefAttribute {
|
||||
uint32 local_pref = 1;
|
||||
}
|
||||
|
||||
message AtomicAggregateAttribute {}
|
||||
|
||||
message AggregatorAttribute {
|
||||
uint32 asn = 1;
|
||||
string address = 2;
|
||||
}
|
||||
|
||||
message CommunitiesAttribute {
|
||||
repeated uint32 communities = 1;
|
||||
}
|
||||
|
||||
message OriginatorIdAttribute {
|
||||
string id = 1;
|
||||
}
|
||||
|
||||
message ClusterListAttribute {
|
||||
repeated string ids = 1;
|
||||
}
|
||||
|
||||
message MpReachNLRIAttribute {
|
||||
Family family = 1;
|
||||
repeated string next_hops = 2;
|
||||
repeated NLRI nlris = 3;
|
||||
}
|
||||
|
||||
message MpUnreachNLRIAttribute {
|
||||
api.Family family = 1;
|
||||
// The same as NLRI field of MpReachNLRIAttribute
|
||||
repeated NLRI nlris = 3;
|
||||
}
|
||||
|
||||
message ExtendedCommunitiesAttribute {
|
||||
repeated ExtendedCommunity communities = 1;
|
||||
}
|
||||
|
||||
message As4PathAttribute {
|
||||
repeated AsSegment segments = 1;
|
||||
}
|
||||
|
||||
message As4AggregatorAttribute {
|
||||
uint32 asn = 2;
|
||||
string address = 3;
|
||||
}
|
||||
|
||||
message PmsiTunnelAttribute {
|
||||
uint32 flags = 1;
|
||||
uint32 type = 2;
|
||||
uint32 label = 3;
|
||||
bytes id = 4;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVEncapsulation {
|
||||
uint32 key = 1;
|
||||
bytes cookie = 2;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVProtocol {
|
||||
uint32 protocol = 1;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVColor {
|
||||
uint32 color = 1;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVSRPreference {
|
||||
uint32 flags = 1;
|
||||
uint32 preference = 2;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVSRCandidatePathName {
|
||||
string candidate_path_name = 1;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVSRPriority {
|
||||
uint32 priority = 1;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVSRBindingSID {
|
||||
oneof bsid {
|
||||
SRBindingSID sr_binding_sid = 1;
|
||||
SRv6BindingSID srv6_binding_sid = 2;
|
||||
}
|
||||
}
|
||||
|
||||
message SRBindingSID {
|
||||
bool s_flag = 1;
|
||||
bool i_flag = 2;
|
||||
bytes sid = 3;
|
||||
}
|
||||
|
||||
enum SRV6Behavior {
|
||||
SRV6_BEHAVIOR_UNSPECIFIED = 0;
|
||||
SRV6_BEHAVIOR_END = 1;
|
||||
SRV6_BEHAVIOR_END_WITH_PSP = 2;
|
||||
SRV6_BEHAVIOR_END_WITH_USP = 3;
|
||||
SRV6_BEHAVIOR_END_WITH_PSP_USP = 4;
|
||||
SRV6_BEHAVIOR_ENDX = 5;
|
||||
SRV6_BEHAVIOR_ENDX_WITH_PSP = 6;
|
||||
SRV6_BEHAVIOR_ENDX_WITH_USP = 7;
|
||||
SRV6_BEHAVIOR_ENDX_WITH_PSP_USP = 8;
|
||||
SRV6_BEHAVIOR_ENDT = 9;
|
||||
SRV6_BEHAVIOR_ENDT_WITH_PSP = 10;
|
||||
SRV6_BEHAVIOR_ENDT_WITH_USP = 11;
|
||||
SRV6_BEHAVIOR_ENDT_WITH_PSP_USP = 12;
|
||||
SRV6_BEHAVIOR_END_B6_ENCAPS = 14;
|
||||
SRV6_BEHAVIOR_END_BM = 15;
|
||||
SRV6_BEHAVIOR_END_DX6 = 16;
|
||||
SRV6_BEHAVIOR_END_DX4 = 17;
|
||||
SRV6_BEHAVIOR_END_DT6 = 18;
|
||||
SRV6_BEHAVIOR_END_DT4 = 19;
|
||||
SRV6_BEHAVIOR_END_DT46 = 20;
|
||||
SRV6_BEHAVIOR_END_DX2 = 21;
|
||||
SRV6_BEHAVIOR_END_DX2V = 22;
|
||||
SRV6_BEHAVIOR_END_DT2U = 23;
|
||||
SRV6_BEHAVIOR_END_DT2M = 24;
|
||||
SRV6_BEHAVIOR_END_B6_ENCAPS_RED = 27;
|
||||
SRV6_BEHAVIOR_END_WITH_USD = 28;
|
||||
SRV6_BEHAVIOR_END_WITH_PSP_USD = 29;
|
||||
SRV6_BEHAVIOR_END_WITH_USP_USD = 30;
|
||||
SRV6_BEHAVIOR_END_WITH_PSP_USP_USD = 31;
|
||||
SRV6_BEHAVIOR_ENDX_WITH_USD = 32;
|
||||
SRV6_BEHAVIOR_ENDX_WITH_PSP_USD = 33;
|
||||
SRV6_BEHAVIOR_ENDX_WITH_USP_USD = 34;
|
||||
SRV6_BEHAVIOR_ENDX_WITH_PSP_USP_USD = 35;
|
||||
SRV6_BEHAVIOR_ENDT_WITH_USD = 36;
|
||||
SRV6_BEHAVIOR_ENDT_WITH_PSP_USD = 37;
|
||||
SRV6_BEHAVIOR_ENDT_WITH_USP_USD = 38;
|
||||
SRV6_BEHAVIOR_ENDT_WITH_PSP_USP_USD = 39;
|
||||
SRV6_BEHAVIOR_ENDM_GTP6D = 69; // 0x0045
|
||||
SRV6_BEHAVIOR_ENDM_GTP6DI = 70; // 0x0046
|
||||
SRV6_BEHAVIOR_ENDM_GTP6E = 71; // 0x0047
|
||||
SRV6_BEHAVIOR_ENDM_GTP4E = 72; // 0x0048
|
||||
}
|
||||
|
||||
message SRv6EndPointBehavior {
|
||||
SRV6Behavior behavior = 1;
|
||||
uint32 block_len = 2;
|
||||
uint32 node_len = 3;
|
||||
uint32 func_len = 4;
|
||||
uint32 arg_len = 5;
|
||||
}
|
||||
|
||||
message SRv6BindingSID {
|
||||
bool s_flag = 1;
|
||||
bool i_flag = 2;
|
||||
bool b_flag = 3;
|
||||
bytes sid = 4;
|
||||
SRv6EndPointBehavior endpoint_behavior_structure = 5;
|
||||
}
|
||||
|
||||
enum ENLPType {
|
||||
ENLP_TYPE_UNSPECIFIED = 0;
|
||||
ENLP_TYPE_TYPE1 = 1;
|
||||
ENLP_TYPE_TYPE2 = 2;
|
||||
ENLP_TYPE_TYPE3 = 3;
|
||||
ENLP_TYPE_TYPE4 = 4;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVSRENLP {
|
||||
uint32 flags = 1;
|
||||
ENLPType enlp = 2;
|
||||
}
|
||||
|
||||
message SRWeight {
|
||||
uint32 flags = 1;
|
||||
uint32 weight = 2;
|
||||
}
|
||||
|
||||
message SegmentFlags {
|
||||
bool v_flag = 1;
|
||||
bool a_flag = 2;
|
||||
bool s_flag = 3;
|
||||
bool b_flag = 4;
|
||||
}
|
||||
|
||||
message SegmentTypeA {
|
||||
SegmentFlags flags = 1;
|
||||
uint32 label = 2;
|
||||
}
|
||||
|
||||
message SegmentTypeB {
|
||||
SegmentFlags flags = 1;
|
||||
bytes sid = 2;
|
||||
SRv6EndPointBehavior endpoint_behavior_structure = 3;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVSRSegmentList {
|
||||
SRWeight weight = 1;
|
||||
|
||||
message Segment {
|
||||
oneof segment {
|
||||
SegmentTypeA a = 1;
|
||||
SegmentTypeB b = 2;
|
||||
}
|
||||
}
|
||||
repeated Segment segments = 2;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVEgressEndpoint {
|
||||
string address = 1;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVUDPDestPort {
|
||||
uint32 port = 1;
|
||||
}
|
||||
|
||||
message TunnelEncapSubTLVUnknown {
|
||||
uint32 type = 1;
|
||||
bytes value = 2;
|
||||
}
|
||||
|
||||
message TunnelEncapTLV {
|
||||
uint32 type = 1;
|
||||
message TLV {
|
||||
oneof tlv {
|
||||
TunnelEncapSubTLVUnknown unknown = 1;
|
||||
TunnelEncapSubTLVEncapsulation encapsulation = 2;
|
||||
TunnelEncapSubTLVProtocol protocol = 3;
|
||||
TunnelEncapSubTLVColor color = 4;
|
||||
TunnelEncapSubTLVEgressEndpoint egress_endpoint = 5;
|
||||
TunnelEncapSubTLVUDPDestPort udp_dest_port = 6;
|
||||
TunnelEncapSubTLVSRPreference sr_preference = 7;
|
||||
TunnelEncapSubTLVSRPriority sr_priority = 8;
|
||||
TunnelEncapSubTLVSRCandidatePathName sr_candidate_path_name = 9;
|
||||
TunnelEncapSubTLVSRENLP sr_enlp = 10;
|
||||
TunnelEncapSubTLVSRBindingSID sr_binding_sid = 11;
|
||||
TunnelEncapSubTLVSRSegmentList sr_segment_list = 12;
|
||||
}
|
||||
}
|
||||
repeated TLV tlvs = 2;
|
||||
}
|
||||
|
||||
message TunnelEncapAttribute {
|
||||
repeated TunnelEncapTLV tlvs = 1;
|
||||
}
|
||||
|
||||
message IPv6AddressSpecificExtended {
|
||||
bool is_transitive = 1;
|
||||
uint32 sub_type = 2;
|
||||
string address = 3;
|
||||
uint32 local_admin = 4;
|
||||
}
|
||||
|
||||
message RedirectIPv6AddressSpecificExtended {
|
||||
string address = 1;
|
||||
uint32 local_admin = 2;
|
||||
}
|
||||
|
||||
message IP6ExtendedCommunitiesAttribute {
|
||||
message Community {
|
||||
oneof extcom {
|
||||
IPv6AddressSpecificExtended ipv6_address_specific = 1;
|
||||
RedirectIPv6AddressSpecificExtended redirect_ipv6_address_specific = 2;
|
||||
}
|
||||
}
|
||||
repeated Community communities = 1;
|
||||
}
|
||||
|
||||
message AigpTLVIGPMetric {
|
||||
uint64 metric = 1;
|
||||
}
|
||||
|
||||
message AigpTLVUnknown {
|
||||
uint32 type = 1;
|
||||
bytes value = 2;
|
||||
}
|
||||
|
||||
message AigpAttribute {
|
||||
message TLV {
|
||||
oneof tlv {
|
||||
AigpTLVUnknown unknown = 1;
|
||||
AigpTLVIGPMetric igp_metric = 2;
|
||||
}
|
||||
}
|
||||
repeated TLV tlvs = 1;
|
||||
}
|
||||
|
||||
message LargeCommunity {
|
||||
uint32 global_admin = 1;
|
||||
uint32 local_data1 = 2;
|
||||
uint32 local_data2 = 3;
|
||||
}
|
||||
|
||||
message LargeCommunitiesAttribute {
|
||||
repeated LargeCommunity communities = 1;
|
||||
}
|
||||
|
||||
message LsNodeFlags {
|
||||
bool overload = 1;
|
||||
bool attached = 2;
|
||||
bool external = 3;
|
||||
bool abr = 4;
|
||||
bool router = 5;
|
||||
bool v6 = 6;
|
||||
}
|
||||
|
||||
message LsIGPFlags {
|
||||
bool down = 1;
|
||||
bool no_unicast = 2;
|
||||
bool local_address = 3;
|
||||
bool propagate_nssa = 4;
|
||||
}
|
||||
|
||||
message LsSrRange {
|
||||
uint32 begin = 1;
|
||||
uint32 end = 2;
|
||||
}
|
||||
|
||||
message LsSrCapabilities {
|
||||
bool ipv4_supported = 1;
|
||||
bool ipv6_supported = 2;
|
||||
repeated LsSrRange ranges = 3;
|
||||
}
|
||||
|
||||
message LsSrLocalBlock {
|
||||
repeated LsSrRange ranges = 1;
|
||||
}
|
||||
|
||||
message LsAttributeNode {
|
||||
string name = 1;
|
||||
LsNodeFlags flags = 2;
|
||||
string local_router_id = 3;
|
||||
string local_router_id_v6 = 4;
|
||||
bytes isis_area = 5;
|
||||
bytes opaque = 6;
|
||||
|
||||
LsSrCapabilities sr_capabilities = 7;
|
||||
bytes sr_algorithms = 8;
|
||||
LsSrLocalBlock sr_local_block = 9;
|
||||
}
|
||||
|
||||
message LsAttributeLink {
|
||||
string name = 1;
|
||||
string local_router_id = 2;
|
||||
string local_router_id_v6 = 3;
|
||||
string remote_router_id = 4;
|
||||
string remote_router_id_v6 = 5;
|
||||
uint32 admin_group = 6;
|
||||
uint32 default_te_metric = 7;
|
||||
uint32 igp_metric = 8;
|
||||
bytes opaque = 9;
|
||||
|
||||
float bandwidth = 10;
|
||||
float reservable_bandwidth = 11;
|
||||
repeated float unreserved_bandwidth = 12;
|
||||
|
||||
uint32 sr_adjacency_sid = 13;
|
||||
repeated uint32 srlgs = 14;
|
||||
LsSrv6EndXSID srv6_end_x_sid = 15;
|
||||
}
|
||||
|
||||
message LsAttributePrefix {
|
||||
LsIGPFlags igp_flags = 1;
|
||||
bytes opaque = 2;
|
||||
|
||||
uint32 sr_prefix_sid = 3;
|
||||
}
|
||||
|
||||
message LsBgpPeerSegmentSIDFlags {
|
||||
bool value = 1;
|
||||
bool local = 2;
|
||||
bool backup = 3;
|
||||
bool persistent = 4;
|
||||
}
|
||||
|
||||
message LsBgpPeerSegmentSID {
|
||||
LsBgpPeerSegmentSIDFlags flags = 1;
|
||||
uint32 weight = 2;
|
||||
uint32 sid = 3;
|
||||
}
|
||||
|
||||
message LsAttributeBgpPeerSegment {
|
||||
LsBgpPeerSegmentSID bgp_peer_node_sid = 1;
|
||||
LsBgpPeerSegmentSID bgp_peer_adjacency_sid = 2;
|
||||
LsBgpPeerSegmentSID bgp_peer_set_sid = 3;
|
||||
}
|
||||
|
||||
message LsSrv6EndXSID {
|
||||
uint32 endpoint_behavior = 1;
|
||||
uint32 flags = 2;
|
||||
uint32 algorithm = 3;
|
||||
uint32 weight = 4;
|
||||
uint32 reserved = 5;
|
||||
repeated string sids = 6;
|
||||
LsSrv6SIDStructure srv6_sid_structure = 7;
|
||||
}
|
||||
|
||||
message LsSrv6SIDStructure {
|
||||
uint32 local_block = 1;
|
||||
uint32 local_node = 2;
|
||||
uint32 local_func = 3;
|
||||
uint32 local_arg = 4;
|
||||
}
|
||||
|
||||
message LsSrv6EndpointBehavior {
|
||||
uint32 endpoint_behavior = 1;
|
||||
uint32 flags = 2;
|
||||
uint32 algorithm = 3;
|
||||
}
|
||||
|
||||
message LsSrv6BgpPeerNodeSID {
|
||||
uint32 flags = 1;
|
||||
uint32 weight = 2;
|
||||
uint32 peer_as = 3;
|
||||
string peer_bgp_id = 4;
|
||||
}
|
||||
|
||||
message LsAttributeSrv6SID {
|
||||
LsSrv6SIDStructure srv6_sid_structure = 1;
|
||||
LsSrv6EndpointBehavior srv6_endpoint_behavior = 2;
|
||||
LsSrv6BgpPeerNodeSID srv6_bgp_peer_node_sid = 3;
|
||||
}
|
||||
|
||||
message LsAttribute {
|
||||
LsAttributeNode node = 1;
|
||||
LsAttributeLink link = 2;
|
||||
LsAttributePrefix prefix = 3;
|
||||
LsAttributeBgpPeerSegment bgp_peer_segment = 4;
|
||||
LsAttributeSrv6SID srv6_sid = 5;
|
||||
}
|
||||
|
||||
message UnknownAttribute {
|
||||
uint32 flags = 1;
|
||||
uint32 type = 2;
|
||||
bytes value = 3;
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc9252.html#section-3.2.1
|
||||
message SRv6StructureSubSubTLV {
|
||||
uint32 locator_block_length = 1;
|
||||
uint32 locator_node_length = 2;
|
||||
uint32 function_length = 3;
|
||||
uint32 argument_length = 4;
|
||||
uint32 transposition_length = 5;
|
||||
uint32 transposition_offset = 6;
|
||||
}
|
||||
|
||||
message SRv6SubSubTLV {
|
||||
oneof tlv {
|
||||
SRv6StructureSubSubTLV structure = 1;
|
||||
}
|
||||
}
|
||||
|
||||
message SRv6SubSubTLVs {
|
||||
repeated SRv6SubSubTLV tlvs = 1;
|
||||
}
|
||||
|
||||
message SRv6SIDFlags {
|
||||
// Placeholder for future sid flags
|
||||
bool flag_1 = 1;
|
||||
}
|
||||
|
||||
// https://tools.ietf.org/html/draft-dawra-bess-srv6-services-02#section-2.1.1
|
||||
message SRv6InformationSubTLV {
|
||||
bytes sid = 1;
|
||||
SRv6SIDFlags flags = 2;
|
||||
uint32 endpoint_behavior = 3;
|
||||
map<uint32, SRv6SubSubTLVs> sub_sub_tlvs = 4;
|
||||
}
|
||||
|
||||
message SRv6SubTLV {
|
||||
oneof tlv {
|
||||
SRv6InformationSubTLV information = 1;
|
||||
}
|
||||
}
|
||||
|
||||
message SRv6SubTLVs {
|
||||
repeated SRv6SubTLV tlvs = 1;
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc9252.html#section-2
|
||||
message SRv6L3ServiceTLV {
|
||||
map<uint32, SRv6SubTLVs> sub_tlvs = 1;
|
||||
}
|
||||
|
||||
// https://www.rfc-editor.org/rfc/rfc9252.html#section-2
|
||||
message SRv6L2ServiceTLV {
|
||||
map<uint32, SRv6SubTLVs> sub_tlvs = 1;
|
||||
}
|
||||
|
||||
// https://tools.ietf.org/html/rfc8669
|
||||
message PrefixSID {
|
||||
// tlv is one of:
|
||||
message TLV {
|
||||
oneof tlv {
|
||||
// IndexLabelTLV Type 1 (not yet implemented)
|
||||
// OriginatorSRGBTLV Type 3 (not yet implemented)
|
||||
SRv6L3ServiceTLV l3_service = 3;
|
||||
SRv6L2ServiceTLV l2_service = 4;
|
||||
}
|
||||
}
|
||||
repeated TLV tlvs = 1;
|
||||
}
|
||||
124
fiberlb/crates/fiberlb-server/proto/api/capability.proto
Normal file
124
fiberlb/crates/fiberlb-server/proto/api/capability.proto
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
// Copyright (C) 2018 Nippon Telegraph and Telephone Corporation.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation files
|
||||
// (the "Software"), to deal in the Software without restriction,
|
||||
// including without limitation the rights to use, copy, modify, merge,
|
||||
// publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
// and to permit persons to whom the Software is furnished to do so,
|
||||
// subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package api;
|
||||
|
||||
import "api/common.proto";
|
||||
|
||||
option go_package = "github.com/osrg/gobgp/v4/api;api";
|
||||
|
||||
message Capability {
|
||||
oneof cap {
|
||||
UnknownCapability unknown = 1;
|
||||
MultiProtocolCapability multi_protocol = 2;
|
||||
RouteRefreshCapability route_refresh = 3;
|
||||
CarryingLabelInfoCapability carrying_label_info = 4;
|
||||
ExtendedNexthopCapability extended_nexthop = 5;
|
||||
GracefulRestartCapability graceful_restart = 6;
|
||||
FourOctetASNCapability four_octet_asn = 7;
|
||||
AddPathCapability add_path = 8;
|
||||
EnhancedRouteRefreshCapability enhanced_route_refresh = 9;
|
||||
LongLivedGracefulRestartCapability long_lived_graceful_restart = 10;
|
||||
RouteRefreshCiscoCapability route_refresh_cisco = 11;
|
||||
FqdnCapability fqdn = 12;
|
||||
SoftwareVersionCapability software_version = 13;
|
||||
}
|
||||
}
|
||||
|
||||
message MultiProtocolCapability {
|
||||
api.Family family = 1;
|
||||
}
|
||||
|
||||
message RouteRefreshCapability {}
|
||||
|
||||
message CarryingLabelInfoCapability {}
|
||||
|
||||
message ExtendedNexthopCapabilityTuple {
|
||||
api.Family nlri_family = 1;
|
||||
// Nexthop AFI must be either
|
||||
// gobgp.IPv4 or
|
||||
// gobgp.IPv6.
|
||||
api.Family nexthop_family = 2;
|
||||
}
|
||||
|
||||
message ExtendedNexthopCapability {
|
||||
repeated ExtendedNexthopCapabilityTuple tuples = 1;
|
||||
}
|
||||
|
||||
message GracefulRestartCapabilityTuple {
|
||||
api.Family family = 1;
|
||||
uint32 flags = 2;
|
||||
}
|
||||
|
||||
message GracefulRestartCapability {
|
||||
uint32 flags = 1;
|
||||
uint32 time = 2;
|
||||
repeated GracefulRestartCapabilityTuple tuples = 3;
|
||||
}
|
||||
|
||||
message FourOctetASNCapability {
|
||||
uint32 asn = 1;
|
||||
}
|
||||
|
||||
message AddPathCapabilityTuple {
|
||||
api.Family family = 1;
|
||||
enum Mode {
|
||||
MODE_UNSPECIFIED = 0; // NONE
|
||||
MODE_RECEIVE = 1;
|
||||
MODE_SEND = 2;
|
||||
MODE_BOTH = 3;
|
||||
}
|
||||
Mode mode = 2;
|
||||
}
|
||||
|
||||
message AddPathCapability {
|
||||
repeated AddPathCapabilityTuple tuples = 1;
|
||||
}
|
||||
|
||||
message EnhancedRouteRefreshCapability {}
|
||||
|
||||
message LongLivedGracefulRestartCapabilityTuple {
|
||||
api.Family family = 1;
|
||||
uint32 flags = 2;
|
||||
uint32 time = 3;
|
||||
}
|
||||
|
||||
message LongLivedGracefulRestartCapability {
|
||||
repeated LongLivedGracefulRestartCapabilityTuple tuples = 1;
|
||||
}
|
||||
|
||||
message RouteRefreshCiscoCapability {}
|
||||
|
||||
message FqdnCapability {
|
||||
string host_name = 1;
|
||||
string domain_name = 2;
|
||||
}
|
||||
|
||||
message SoftwareVersionCapability {
|
||||
string software_version = 1;
|
||||
}
|
||||
|
||||
message UnknownCapability {
|
||||
uint32 code = 1;
|
||||
bytes value = 2;
|
||||
}
|
||||
63
fiberlb/crates/fiberlb-server/proto/api/common.proto
Normal file
63
fiberlb/crates/fiberlb-server/proto/api/common.proto
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package api;
|
||||
|
||||
option go_package = "github.com/osrg/gobgp/v4/api;api";
|
||||
|
||||
// Common types for pretty much everywhere
|
||||
|
||||
message Family {
|
||||
enum Afi {
|
||||
AFI_UNSPECIFIED = 0;
|
||||
AFI_IP = 1;
|
||||
AFI_IP6 = 2;
|
||||
AFI_L2VPN = 25;
|
||||
AFI_LS = 16388;
|
||||
AFI_OPAQUE = 16397;
|
||||
}
|
||||
|
||||
enum Safi {
|
||||
SAFI_UNSPECIFIED = 0;
|
||||
SAFI_UNICAST = 1;
|
||||
SAFI_MULTICAST = 2;
|
||||
SAFI_MPLS_LABEL = 4;
|
||||
SAFI_ENCAPSULATION = 7;
|
||||
SAFI_VPLS = 65;
|
||||
SAFI_EVPN = 70;
|
||||
SAFI_LS = 71;
|
||||
SAFI_SR_POLICY = 73;
|
||||
SAFI_MUP = 85;
|
||||
SAFI_MPLS_VPN = 128;
|
||||
SAFI_MPLS_VPN_MULTICAST = 129;
|
||||
SAFI_ROUTE_TARGET_CONSTRAINTS = 132;
|
||||
SAFI_FLOW_SPEC_UNICAST = 133;
|
||||
SAFI_FLOW_SPEC_VPN = 134;
|
||||
SAFI_KEY_VALUE = 241;
|
||||
}
|
||||
|
||||
Afi afi = 1;
|
||||
Safi safi = 2;
|
||||
}
|
||||
|
||||
message RouteDistinguisherTwoOctetASN {
|
||||
uint32 admin = 1;
|
||||
uint32 assigned = 2;
|
||||
}
|
||||
|
||||
message RouteDistinguisherIPAddress {
|
||||
string admin = 1;
|
||||
uint32 assigned = 2;
|
||||
}
|
||||
|
||||
message RouteDistinguisherFourOctetASN {
|
||||
uint32 admin = 1;
|
||||
uint32 assigned = 2;
|
||||
}
|
||||
|
||||
message RouteDistinguisher {
|
||||
oneof rd {
|
||||
RouteDistinguisherTwoOctetASN two_octet_asn = 1;
|
||||
RouteDistinguisherIPAddress ip_address = 2;
|
||||
RouteDistinguisherFourOctetASN four_octet_asn = 3;
|
||||
}
|
||||
}
|
||||
162
fiberlb/crates/fiberlb-server/proto/api/extcom.proto
Normal file
162
fiberlb/crates/fiberlb-server/proto/api/extcom.proto
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package api;
|
||||
|
||||
option go_package = "github.com/osrg/gobgp/v4/api;api";
|
||||
|
||||
// BGP Extended communities
|
||||
|
||||
message TwoOctetAsSpecificExtended {
|
||||
bool is_transitive = 1;
|
||||
uint32 sub_type = 2;
|
||||
uint32 asn = 3;
|
||||
uint32 local_admin = 4;
|
||||
}
|
||||
|
||||
message IPv4AddressSpecificExtended {
|
||||
bool is_transitive = 1;
|
||||
uint32 sub_type = 2;
|
||||
string address = 3;
|
||||
uint32 local_admin = 4;
|
||||
}
|
||||
|
||||
message FourOctetAsSpecificExtended {
|
||||
bool is_transitive = 1;
|
||||
uint32 sub_type = 2;
|
||||
uint32 asn = 3;
|
||||
uint32 local_admin = 4;
|
||||
}
|
||||
|
||||
message LinkBandwidthExtended {
|
||||
uint32 asn = 1;
|
||||
float bandwidth = 2;
|
||||
}
|
||||
|
||||
message ValidationExtended {
|
||||
uint32 state = 1;
|
||||
}
|
||||
|
||||
message ColorExtended {
|
||||
uint32 color = 1;
|
||||
}
|
||||
|
||||
message EncapExtended {
|
||||
uint32 tunnel_type = 1;
|
||||
}
|
||||
|
||||
message DefaultGatewayExtended {}
|
||||
|
||||
message OpaqueExtended {
|
||||
bool is_transitive = 1;
|
||||
bytes value = 3;
|
||||
}
|
||||
|
||||
message ESILabelExtended {
|
||||
bool is_single_active = 1;
|
||||
uint32 label = 2;
|
||||
}
|
||||
|
||||
message ESImportRouteTarget {
|
||||
string es_import = 1;
|
||||
}
|
||||
|
||||
message MacMobilityExtended {
|
||||
bool is_sticky = 1;
|
||||
uint32 sequence_num = 2;
|
||||
}
|
||||
|
||||
message RouterMacExtended {
|
||||
string mac = 1;
|
||||
}
|
||||
|
||||
message TrafficRateExtended {
|
||||
uint32 asn = 1;
|
||||
float rate = 2;
|
||||
}
|
||||
|
||||
message TrafficActionExtended {
|
||||
bool terminal = 1;
|
||||
bool sample = 2;
|
||||
}
|
||||
|
||||
message RedirectTwoOctetAsSpecificExtended {
|
||||
uint32 asn = 1;
|
||||
uint32 local_admin = 2;
|
||||
}
|
||||
|
||||
message RedirectIPv4AddressSpecificExtended {
|
||||
string address = 1;
|
||||
uint32 local_admin = 2;
|
||||
}
|
||||
|
||||
message RedirectFourOctetAsSpecificExtended {
|
||||
uint32 asn = 1;
|
||||
uint32 local_admin = 2;
|
||||
}
|
||||
|
||||
message TrafficRemarkExtended {
|
||||
uint32 dscp = 1;
|
||||
}
|
||||
|
||||
message MUPExtended {
|
||||
uint32 sub_type = 1;
|
||||
uint32 segment_id2 = 2;
|
||||
uint32 segment_id4 = 3;
|
||||
}
|
||||
|
||||
message VPLSExtended {
|
||||
uint32 control_flags = 1;
|
||||
uint32 mtu = 2;
|
||||
}
|
||||
|
||||
message ETreeExtended {
|
||||
bool is_leaf = 1;
|
||||
uint32 label = 2;
|
||||
}
|
||||
|
||||
message MulticastFlagsExtended {
|
||||
bool is_igmp_proxy = 1;
|
||||
bool is_mld_proxy = 2;
|
||||
}
|
||||
|
||||
message UnknownExtended {
|
||||
uint32 type = 1;
|
||||
bytes value = 2;
|
||||
}
|
||||
|
||||
message ExtendedCommunity {
|
||||
oneof extcom {
|
||||
UnknownExtended unknown = 1;
|
||||
TwoOctetAsSpecificExtended two_octet_as_specific = 2;
|
||||
IPv4AddressSpecificExtended ipv4_address_specific = 3;
|
||||
FourOctetAsSpecificExtended four_octet_as_specific = 4;
|
||||
LinkBandwidthExtended link_bandwidth = 5;
|
||||
ValidationExtended validation = 6;
|
||||
ColorExtended color = 7;
|
||||
EncapExtended encap = 8;
|
||||
DefaultGatewayExtended default_gateway = 9;
|
||||
OpaqueExtended opaque = 10;
|
||||
ESILabelExtended esi_label = 11;
|
||||
ESImportRouteTarget es_import = 12;
|
||||
MacMobilityExtended mac_mobility = 13;
|
||||
RouterMacExtended router_mac = 14;
|
||||
TrafficRateExtended traffic_rate = 15;
|
||||
TrafficActionExtended traffic_action = 16;
|
||||
RedirectTwoOctetAsSpecificExtended redirect_two_octet_as_specific = 17;
|
||||
RedirectIPv4AddressSpecificExtended redirect_ipv4_address_specific = 18;
|
||||
RedirectFourOctetAsSpecificExtended redirect_four_octet_as_specific = 19;
|
||||
TrafficRemarkExtended traffic_remark = 20;
|
||||
MUPExtended mup = 21;
|
||||
VPLSExtended vpls = 22;
|
||||
ETreeExtended etree = 23;
|
||||
MulticastFlagsExtended multicast_flags = 24;
|
||||
}
|
||||
}
|
||||
|
||||
message RouteTarget {
|
||||
oneof rt {
|
||||
TwoOctetAsSpecificExtended two_octet_as_specific = 1;
|
||||
IPv4AddressSpecificExtended ipv4_address_specific = 2;
|
||||
FourOctetAsSpecificExtended four_octet_as_specific = 3;
|
||||
}
|
||||
}
|
||||
1379
fiberlb/crates/fiberlb-server/proto/api/gobgp.proto
Normal file
1379
fiberlb/crates/fiberlb-server/proto/api/gobgp.proto
Normal file
File diff suppressed because it is too large
Load diff
361
fiberlb/crates/fiberlb-server/proto/api/nlri.proto
Normal file
361
fiberlb/crates/fiberlb-server/proto/api/nlri.proto
Normal file
|
|
@ -0,0 +1,361 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package api;
|
||||
|
||||
import "api/common.proto";
|
||||
import "api/extcom.proto";
|
||||
|
||||
option go_package = "github.com/osrg/gobgp/v4/api;api";
|
||||
|
||||
// Main NLRI type
|
||||
|
||||
message NLRI {
|
||||
oneof nlri {
|
||||
IPAddressPrefix prefix = 1;
|
||||
LabeledIPAddressPrefix labeled_prefix = 2;
|
||||
EncapsulationNLRI encapsulation = 3;
|
||||
VPLSNLRI vpls = 4;
|
||||
EVPNEthernetAutoDiscoveryRoute evpn_ethernet_ad = 5;
|
||||
EVPNMACIPAdvertisementRoute evpn_macadv = 6;
|
||||
EVPNInclusiveMulticastEthernetTagRoute evpn_multicast = 7;
|
||||
EVPNEthernetSegmentRoute evpn_ethernet_segment = 8;
|
||||
EVPNIPPrefixRoute evpn_ip_prefix = 9;
|
||||
EVPNIPMSIRoute evpn_i_pmsi = 10;
|
||||
LabeledVPNIPAddressPrefix labeled_vpn_ip_prefix = 11;
|
||||
RouteTargetMembershipNLRI route_target_membership = 12;
|
||||
FlowSpecNLRI flow_spec = 13;
|
||||
VPNFlowSpecNLRI vpn_flow_spec = 14;
|
||||
OpaqueNLRI opaque = 15;
|
||||
LsAddrPrefix ls_addr_prefix = 16;
|
||||
SRPolicyNLRI sr_policy = 17;
|
||||
MUPInterworkSegmentDiscoveryRoute mup_interwork_segment_discovery = 18;
|
||||
MUPDirectSegmentDiscoveryRoute mup_direct_segment_discovery = 19;
|
||||
MUPType1SessionTransformedRoute mup_type_1_session_transformed = 20;
|
||||
MUPType2SessionTransformedRoute mup_type_2_session_transformed = 21;
|
||||
}
|
||||
}
|
||||
|
||||
// IPAddressPrefix represents the NLRI for:
|
||||
// - AFI=1, SAFI=1
|
||||
// - AFI=2, SAFI=1
|
||||
message IPAddressPrefix {
|
||||
uint32 prefix_len = 1;
|
||||
string prefix = 2;
|
||||
}
|
||||
|
||||
// LabeledIPAddressPrefix represents the NLRI for:
|
||||
// - AFI=1, SAFI=4
|
||||
// - AFI=2, SAFI=4
|
||||
message LabeledIPAddressPrefix {
|
||||
repeated uint32 labels = 1;
|
||||
uint32 prefix_len = 2;
|
||||
string prefix = 3;
|
||||
}
|
||||
|
||||
// EncapsulationNLRI represents the NLRI for:
|
||||
// - AFI=1, SAFI=7
|
||||
// - AFI=2, SAFI=7
|
||||
message EncapsulationNLRI {
|
||||
string address = 1;
|
||||
}
|
||||
|
||||
// VPLSNLRI represents the NLRI for:
|
||||
// - AFI=25, SAFI=65
|
||||
message VPLSNLRI {
|
||||
RouteDistinguisher rd = 1;
|
||||
uint32 ve_id = 2;
|
||||
uint32 ve_block_offset = 3;
|
||||
uint32 ve_block_size = 4;
|
||||
uint32 label_block_base = 5;
|
||||
}
|
||||
|
||||
message EthernetSegmentIdentifier {
|
||||
uint32 type = 1;
|
||||
bytes value = 2;
|
||||
}
|
||||
|
||||
// EVPNEthernetAutoDiscoveryRoute represents the NLRI for:
|
||||
// - AFI=25, SAFI=70, RouteType=1
|
||||
message EVPNEthernetAutoDiscoveryRoute {
|
||||
RouteDistinguisher rd = 1;
|
||||
EthernetSegmentIdentifier esi = 2;
|
||||
uint32 ethernet_tag = 3;
|
||||
uint32 label = 4;
|
||||
}
|
||||
|
||||
// EVPNMACIPAdvertisementRoute represents the NLRI for:
|
||||
// - AFI=25, SAFI=70, RouteType=2
|
||||
message EVPNMACIPAdvertisementRoute {
|
||||
RouteDistinguisher rd = 1;
|
||||
EthernetSegmentIdentifier esi = 2;
|
||||
uint32 ethernet_tag = 3;
|
||||
string mac_address = 4;
|
||||
string ip_address = 5;
|
||||
repeated uint32 labels = 6;
|
||||
}
|
||||
|
||||
// EVPNInclusiveMulticastEthernetTagRoute represents the NLRI for:
|
||||
// - AFI=25, SAFI=70, RouteType=3
|
||||
message EVPNInclusiveMulticastEthernetTagRoute {
|
||||
RouteDistinguisher rd = 1;
|
||||
uint32 ethernet_tag = 2;
|
||||
string ip_address = 3;
|
||||
}
|
||||
|
||||
// EVPNEthernetSegmentRoute represents the NLRI for:
|
||||
// - AFI=25, SAFI=70, RouteType=4
|
||||
message EVPNEthernetSegmentRoute {
|
||||
RouteDistinguisher rd = 1;
|
||||
EthernetSegmentIdentifier esi = 2;
|
||||
string ip_address = 3;
|
||||
}
|
||||
|
||||
// EVPNIPPrefixRoute represents the NLRI for:
|
||||
// - AFI=25, SAFI=70, RouteType=5
|
||||
message EVPNIPPrefixRoute {
|
||||
RouteDistinguisher rd = 1;
|
||||
EthernetSegmentIdentifier esi = 2;
|
||||
uint32 ethernet_tag = 3;
|
||||
string ip_prefix = 4;
|
||||
uint32 ip_prefix_len = 5;
|
||||
string gw_address = 6;
|
||||
uint32 label = 7;
|
||||
}
|
||||
|
||||
// EVPNIPMSIRoute represents the NLRI for:
|
||||
// - AFI=25, SAFI=70, RouteType=9
|
||||
message EVPNIPMSIRoute {
|
||||
RouteDistinguisher rd = 1;
|
||||
uint32 ethernet_tag = 2;
|
||||
RouteTarget rt = 3;
|
||||
}
|
||||
|
||||
// SRPolicyNLRI represents the NLRI for:
|
||||
// - AFI=1, SAFI=73
|
||||
// - AFI=2, SAFI=73
|
||||
message SRPolicyNLRI {
|
||||
// length field carries the length of NLRI portion expressed in bits
|
||||
uint32 length = 1;
|
||||
// distinguisher field carries 4-octet value uniquely identifying the policy
|
||||
// in the context of <color, endpoint> tuple.
|
||||
uint32 distinguisher = 2;
|
||||
// color field carries 4-octet value identifying (with the endpoint) the
|
||||
// policy. The color is used to match the color of the destination
|
||||
// prefixes to steer traffic into the SR Policy
|
||||
uint32 color = 3;
|
||||
// endpoint field identifies the endpoint of a policy. The Endpoint may
|
||||
// represent a single node or a set of nodes (e.g., an anycast
|
||||
// address). The Endpoint is an IPv4 (4-octet) address or an IPv6
|
||||
// (16-octet) address according to the AFI of the NLRI.
|
||||
bytes endpoint = 4;
|
||||
}
|
||||
|
||||
// LabeledVPNIPAddressPrefix represents the NLRI for:
|
||||
// - AFI=1, SAFI=128
|
||||
// - AFI=2, SAFI=128
|
||||
message LabeledVPNIPAddressPrefix {
|
||||
repeated uint32 labels = 1;
|
||||
RouteDistinguisher rd = 2;
|
||||
uint32 prefix_len = 3;
|
||||
string prefix = 4;
|
||||
}
|
||||
|
||||
// RouteTargetMembershipNLRI represents the NLRI for:
|
||||
// - AFI=1, SAFI=132
|
||||
message RouteTargetMembershipNLRI {
|
||||
uint32 asn = 1;
|
||||
RouteTarget rt = 2;
|
||||
}
|
||||
|
||||
message FlowSpecIPPrefix {
|
||||
uint32 type = 1;
|
||||
uint32 prefix_len = 2;
|
||||
string prefix = 3;
|
||||
// IPv6 only
|
||||
uint32 offset = 4;
|
||||
}
|
||||
|
||||
message FlowSpecMAC {
|
||||
uint32 type = 1;
|
||||
string address = 2;
|
||||
}
|
||||
|
||||
message FlowSpecComponentItem {
|
||||
// Operator for Numeric type, Operand for Bitmask type
|
||||
uint32 op = 1;
|
||||
uint64 value = 2;
|
||||
}
|
||||
|
||||
message FlowSpecComponent {
|
||||
uint32 type = 1;
|
||||
repeated FlowSpecComponentItem items = 2;
|
||||
}
|
||||
|
||||
message FlowSpecRule {
|
||||
oneof rule {
|
||||
FlowSpecIPPrefix ip_prefix = 1;
|
||||
FlowSpecMAC mac = 2;
|
||||
FlowSpecComponent component = 3;
|
||||
}
|
||||
}
|
||||
|
||||
// FlowSpecNLRI represents the NLRI for:
|
||||
// - AFI=1, SAFI=133
|
||||
// - AFI=2, SAFI=133
|
||||
message FlowSpecNLRI {
|
||||
repeated FlowSpecRule rules = 1;
|
||||
}
|
||||
|
||||
// VPNFlowSpecNLRI represents the NLRI for:
|
||||
// - AFI=1, SAFI=134
|
||||
// - AFI=2, SAFI=134
|
||||
// - AFI=25, SAFI=134
|
||||
message VPNFlowSpecNLRI {
|
||||
RouteDistinguisher rd = 1;
|
||||
repeated FlowSpecRule rules = 2;
|
||||
}
|
||||
|
||||
// OpaqueNLRI represents the NLRI for:
|
||||
// - AFI=16397, SAFI=241
|
||||
message OpaqueNLRI {
|
||||
bytes key = 1;
|
||||
bytes value = 2;
|
||||
}
|
||||
|
||||
// Based om RFC 7752, Table 1.
|
||||
enum LsNLRIType {
|
||||
LS_NLRI_TYPE_UNSPECIFIED = 0;
|
||||
LS_NLRI_TYPE_NODE = 1;
|
||||
LS_NLRI_TYPE_LINK = 2;
|
||||
LS_NLRI_TYPE_PREFIX_V4 = 3;
|
||||
LS_NLRI_TYPE_PREFIX_V6 = 4;
|
||||
LS_NLRI_TYPE_SRV6_SID = 6;
|
||||
}
|
||||
|
||||
enum LsProtocolID {
|
||||
LS_PROTOCOL_ID_UNSPECIFIED = 0;
|
||||
LS_PROTOCOL_ID_ISIS_L1 = 1;
|
||||
LS_PROTOCOL_ID_ISIS_L2 = 2;
|
||||
LS_PROTOCOL_ID_OSPF_V2 = 3;
|
||||
LS_PROTOCOL_ID_DIRECT = 4;
|
||||
LS_PROTOCOL_ID_STATIC = 5;
|
||||
LS_PROTOCOL_ID_OSPF_V3 = 6;
|
||||
}
|
||||
|
||||
message LsNodeDescriptor {
|
||||
uint32 asn = 1;
|
||||
uint32 bgp_ls_id = 2;
|
||||
uint32 ospf_area_id = 3;
|
||||
bool pseudonode = 4;
|
||||
string igp_router_id = 5;
|
||||
string bgp_router_id = 6;
|
||||
uint32 bgp_confederation_member = 7;
|
||||
}
|
||||
|
||||
message LsLinkDescriptor {
|
||||
uint32 link_local_id = 1;
|
||||
uint32 link_remote_id = 2;
|
||||
string interface_addr_ipv4 = 3;
|
||||
string neighbor_addr_ipv4 = 4;
|
||||
string interface_addr_ipv6 = 5;
|
||||
string neighbor_addr_ipv6 = 6;
|
||||
}
|
||||
|
||||
enum LsOspfRouteType {
|
||||
LS_OSPF_ROUTE_TYPE_UNSPECIFIED = 0;
|
||||
LS_OSPF_ROUTE_TYPE_INTRA_AREA = 1;
|
||||
LS_OSPF_ROUTE_TYPE_INTER_AREA = 2;
|
||||
LS_OSPF_ROUTE_TYPE_EXTERNAL1 = 3;
|
||||
LS_OSPF_ROUTE_TYPE_EXTERNAL2 = 4;
|
||||
LS_OSPF_ROUTE_TYPE_NSSA1 = 5;
|
||||
LS_OSPF_ROUTE_TYPE_NSSA2 = 6;
|
||||
}
|
||||
|
||||
message LsPrefixDescriptor {
|
||||
repeated string ip_reachability = 1;
|
||||
LsOspfRouteType ospf_route_type = 2;
|
||||
}
|
||||
|
||||
message LsNodeNLRI {
|
||||
LsNodeDescriptor local_node = 1;
|
||||
}
|
||||
|
||||
message LsLinkNLRI {
|
||||
LsNodeDescriptor local_node = 1;
|
||||
LsNodeDescriptor remote_node = 2;
|
||||
LsLinkDescriptor link_descriptor = 3;
|
||||
}
|
||||
|
||||
message LsPrefixV4NLRI {
|
||||
LsNodeDescriptor local_node = 1;
|
||||
LsPrefixDescriptor prefix_descriptor = 2;
|
||||
}
|
||||
|
||||
message LsPrefixV6NLRI {
|
||||
LsNodeDescriptor local_node = 1;
|
||||
LsPrefixDescriptor prefix_descriptor = 2;
|
||||
}
|
||||
|
||||
// https://tools.ietf.org/html/rfc9552
|
||||
message LsSrv6SIDInformation {
|
||||
repeated string sids = 1;
|
||||
}
|
||||
|
||||
message LsMultiTopologyIdentifier {
|
||||
repeated uint32 multi_topo_ids = 1;
|
||||
}
|
||||
|
||||
// TODO: LsSrPolicyiCandidatePathNLRI
|
||||
message LsSrv6SIDNLRI {
|
||||
LsNodeDescriptor local_node = 1;
|
||||
LsSrv6SIDInformation srv6_sid_information = 2;
|
||||
LsMultiTopologyIdentifier multi_topo_id = 3;
|
||||
}
|
||||
|
||||
// LsAddrPrefix represents the NLRI for:
|
||||
// - AFI=16388, SAFI=71
|
||||
message LsAddrPrefix {
|
||||
LsNLRIType type = 1;
|
||||
message LsNLRI {
|
||||
oneof nlri {
|
||||
LsNodeNLRI node = 1;
|
||||
LsLinkNLRI link = 2;
|
||||
LsPrefixV4NLRI prefix_v4 = 3;
|
||||
LsPrefixV6NLRI prefix_v6 = 4;
|
||||
LsSrv6SIDNLRI srv6_sid = 5;
|
||||
}
|
||||
}
|
||||
LsNLRI nlri = 2;
|
||||
uint32 length = 3;
|
||||
LsProtocolID protocol_id = 4;
|
||||
uint64 identifier = 5;
|
||||
}
|
||||
|
||||
message MUPInterworkSegmentDiscoveryRoute {
|
||||
RouteDistinguisher rd = 1;
|
||||
string prefix = 2;
|
||||
}
|
||||
|
||||
message MUPDirectSegmentDiscoveryRoute {
|
||||
RouteDistinguisher rd = 1;
|
||||
string address = 2;
|
||||
}
|
||||
|
||||
message MUPType1SessionTransformedRoute {
|
||||
RouteDistinguisher rd = 1;
|
||||
uint32 prefix_length = 2 [deprecated = true];
|
||||
string prefix = 3;
|
||||
uint32 teid = 4;
|
||||
uint32 qfi = 5;
|
||||
uint32 endpoint_address_length = 6;
|
||||
string endpoint_address = 7;
|
||||
uint32 source_address_length = 8;
|
||||
string source_address = 9;
|
||||
}
|
||||
|
||||
message MUPType2SessionTransformedRoute {
|
||||
RouteDistinguisher rd = 1;
|
||||
uint32 endpoint_address_length = 2;
|
||||
string endpoint_address = 3;
|
||||
uint32 teid = 4;
|
||||
}
|
||||
3
fiberlb/crates/fiberlb-server/src/gobgp.rs
Normal file
3
fiberlb/crates/fiberlb-server/src/gobgp.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
pub mod api {
|
||||
tonic::include_proto!("api");
|
||||
}
|
||||
16
flaredb/crates/flaredb-client/examples/basic.rs
Normal file
16
flaredb/crates/flaredb-client/examples/basic.rs
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
use flaredb_client::RdbClient;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Connect via PD (retry/backoff enabled by default).
|
||||
let mut client = RdbClient::builder("127.0.0.1:2379")
|
||||
.namespace("default")
|
||||
.build()
|
||||
.await?;
|
||||
|
||||
client.raw_put(b"example".to_vec(), b"value".to_vec()).await?;
|
||||
let val = client.raw_get(b"example".to_vec()).await?;
|
||||
println!("Got: {:?}", val);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
224
flashdns/crates/flashdns-server/src/reverse_zone_service.rs
Normal file
224
flashdns/crates/flashdns-server/src/reverse_zone_service.rs
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
//! ReverseZoneService gRPC implementation
|
||||
|
||||
use std::net::IpAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use crate::dns::ptr_patterns::apply_pattern;
|
||||
use crate::metadata::DnsMetadataStore;
|
||||
use flashdns_api::proto::{
|
||||
CreateReverseZoneRequest, DeleteReverseZoneRequest, DeleteReverseZoneResponse,
|
||||
GetReverseZoneRequest, ListReverseZonesRequest, ListReverseZonesResponse,
|
||||
ResolvePtrForIpRequest, ResolvePtrForIpResponse, ReverseZone as ProtoReverseZone,
|
||||
};
|
||||
use flashdns_api::ReverseZoneService;
|
||||
use flashdns_types::ReverseZone;
|
||||
use ipnet::IpNet;
|
||||
use tonic::{Request, Response, Status};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// ReverseZoneService implementation
|
||||
pub struct ReverseZoneServiceImpl {
|
||||
metadata: Arc<DnsMetadataStore>,
|
||||
}
|
||||
|
||||
impl ReverseZoneServiceImpl {
|
||||
/// Create a new ReverseZoneService with metadata store
|
||||
pub fn new(metadata: Arc<DnsMetadataStore>) -> Self {
|
||||
Self { metadata }
|
||||
}
|
||||
}
|
||||
|
||||
fn reverse_zone_to_proto(zone: &ReverseZone) -> ProtoReverseZone {
|
||||
ProtoReverseZone {
|
||||
id: zone.id.clone(),
|
||||
org_id: zone.org_id.clone(),
|
||||
project_id: zone.project_id.clone(),
|
||||
cidr: zone.cidr.clone(),
|
||||
arpa_zone: zone.arpa_zone.clone(),
|
||||
ptr_pattern: zone.ptr_pattern.clone(),
|
||||
ttl: zone.ttl,
|
||||
created_at: zone.created_at,
|
||||
updated_at: zone.updated_at,
|
||||
}
|
||||
}
|
||||
|
||||
fn now_epoch() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
fn find_reverse_zone_for_ip(zones: &[ReverseZone], ip: IpAddr) -> Option<ReverseZone> {
|
||||
let mut best_match: Option<ReverseZone> = None;
|
||||
let mut best_prefix_len = 0;
|
||||
|
||||
for zone in zones {
|
||||
if let Ok(cidr) = zone.cidr.parse::<IpNet>() {
|
||||
if cidr.contains(&ip) {
|
||||
let prefix_len = cidr.prefix_len();
|
||||
if prefix_len > best_prefix_len {
|
||||
best_prefix_len = prefix_len;
|
||||
best_match = Some(zone.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
best_match
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl ReverseZoneService for ReverseZoneServiceImpl {
|
||||
async fn create_reverse_zone(
|
||||
&self,
|
||||
request: Request<CreateReverseZoneRequest>,
|
||||
) -> Result<Response<ProtoReverseZone>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
if req.org_id.is_empty() {
|
||||
return Err(Status::invalid_argument("org_id is required"));
|
||||
}
|
||||
if req.cidr.is_empty() {
|
||||
return Err(Status::invalid_argument("cidr is required"));
|
||||
}
|
||||
if req.ptr_pattern.is_empty() {
|
||||
return Err(Status::invalid_argument("ptr_pattern is required"));
|
||||
}
|
||||
|
||||
let existing = self
|
||||
.metadata
|
||||
.list_reverse_zones(&req.org_id, req.project_id.as_deref())
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("metadata error: {}", e)))?;
|
||||
|
||||
if existing.iter().any(|zone| zone.cidr == req.cidr) {
|
||||
return Err(Status::already_exists("reverse zone already exists"));
|
||||
}
|
||||
|
||||
let now = now_epoch();
|
||||
let mut zone = ReverseZone {
|
||||
id: Uuid::new_v4().to_string(),
|
||||
org_id: req.org_id,
|
||||
project_id: req.project_id,
|
||||
cidr: req.cidr,
|
||||
arpa_zone: String::new(),
|
||||
ptr_pattern: req.ptr_pattern,
|
||||
ttl: if req.ttl == 0 { 3600 } else { req.ttl },
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
};
|
||||
|
||||
zone = self
|
||||
.metadata
|
||||
.create_reverse_zone(zone)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("failed to save reverse zone: {}", e)))?;
|
||||
|
||||
Ok(Response::new(reverse_zone_to_proto(&zone)))
|
||||
}
|
||||
|
||||
async fn get_reverse_zone(
|
||||
&self,
|
||||
request: Request<GetReverseZoneRequest>,
|
||||
) -> Result<Response<ProtoReverseZone>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
if req.zone_id.is_empty() {
|
||||
return Err(Status::invalid_argument("zone_id is required"));
|
||||
}
|
||||
|
||||
let zone = self
|
||||
.metadata
|
||||
.get_reverse_zone(&req.zone_id)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("metadata error: {}", e)))?
|
||||
.ok_or_else(|| Status::not_found("reverse zone not found"))?;
|
||||
|
||||
Ok(Response::new(reverse_zone_to_proto(&zone)))
|
||||
}
|
||||
|
||||
async fn delete_reverse_zone(
|
||||
&self,
|
||||
request: Request<DeleteReverseZoneRequest>,
|
||||
) -> Result<Response<DeleteReverseZoneResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
if req.zone_id.is_empty() {
|
||||
return Err(Status::invalid_argument("zone_id is required"));
|
||||
}
|
||||
|
||||
let zone = self
|
||||
.metadata
|
||||
.get_reverse_zone(&req.zone_id)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("metadata error: {}", e)))?
|
||||
.ok_or_else(|| Status::not_found("reverse zone not found"))?;
|
||||
|
||||
self.metadata
|
||||
.delete_reverse_zone(&zone)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("failed to delete reverse zone: {}", e)))?;
|
||||
|
||||
Ok(Response::new(DeleteReverseZoneResponse { success: true }))
|
||||
}
|
||||
|
||||
async fn list_reverse_zones(
|
||||
&self,
|
||||
request: Request<ListReverseZonesRequest>,
|
||||
) -> Result<Response<ListReverseZonesResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
if req.org_id.is_empty() {
|
||||
return Err(Status::invalid_argument("org_id is required"));
|
||||
}
|
||||
|
||||
let zones = self
|
||||
.metadata
|
||||
.list_reverse_zones(&req.org_id, req.project_id.as_deref())
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("metadata error: {}", e)))?;
|
||||
|
||||
let proto_zones = zones.iter().map(reverse_zone_to_proto).collect();
|
||||
|
||||
Ok(Response::new(ListReverseZonesResponse { zones: proto_zones }))
|
||||
}
|
||||
|
||||
async fn resolve_ptr_for_ip(
|
||||
&self,
|
||||
request: Request<ResolvePtrForIpRequest>,
|
||||
) -> Result<Response<ResolvePtrForIpResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
if req.ip_address.is_empty() {
|
||||
return Err(Status::invalid_argument("ip_address is required"));
|
||||
}
|
||||
|
||||
let ip: IpAddr = req
|
||||
.ip_address
|
||||
.parse()
|
||||
.map_err(|_| Status::invalid_argument("invalid ip_address"))?;
|
||||
|
||||
let zones = self
|
||||
.metadata
|
||||
.list_all_reverse_zones()
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("metadata error: {}", e)))?;
|
||||
|
||||
if let Some(zone) = find_reverse_zone_for_ip(&zones, ip) {
|
||||
let ptr_value = apply_pattern(&zone.ptr_pattern, ip);
|
||||
return Ok(Response::new(ResolvePtrForIpResponse {
|
||||
ptr_record: Some(ptr_value),
|
||||
reverse_zone_id: Some(zone.id),
|
||||
found: true,
|
||||
}));
|
||||
}
|
||||
|
||||
Ok(Response::new(ResolvePtrForIpResponse {
|
||||
ptr_record: None,
|
||||
reverse_zone_id: None,
|
||||
found: false,
|
||||
}))
|
||||
}
|
||||
}
|
||||
365
iam/crates/iam-api/src/credential_service.rs
Normal file
365
iam/crates/iam-api/src/credential_service.rs
Normal file
|
|
@ -0,0 +1,365 @@
|
|||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use aes_gcm::{aead::Aead, Aes256Gcm, Key, KeyInit, Nonce};
|
||||
use argon2::{password_hash::{PasswordHasher, SaltString}, Argon2};
|
||||
use base64::{engine::general_purpose::STANDARD, Engine};
|
||||
use rand_core::{OsRng, RngCore};
|
||||
use tonic::{Request, Response, Status};
|
||||
|
||||
use iam_store::CredentialStore;
|
||||
use iam_types::{Argon2Params, CredentialRecord};
|
||||
|
||||
use crate::proto::{
|
||||
iam_credential_server::IamCredential, CreateS3CredentialRequest,
|
||||
CreateS3CredentialResponse, Credential, GetSecretKeyRequest, GetSecretKeyResponse,
|
||||
ListCredentialsRequest, ListCredentialsResponse, RevokeCredentialRequest,
|
||||
RevokeCredentialResponse,
|
||||
};
|
||||
|
||||
fn now_ts() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
pub struct IamCredentialService {
|
||||
store: Arc<CredentialStore>,
|
||||
cipher: Aes256Gcm,
|
||||
key_id: String,
|
||||
}
|
||||
|
||||
impl IamCredentialService {
|
||||
pub fn new(store: Arc<CredentialStore>, master_key: &[u8], key_id: &str) -> Result<Self, Status> {
|
||||
if master_key.len() != 32 {
|
||||
return Err(Status::failed_precondition(
|
||||
"IAM_CRED_MASTER_KEY must be 32 bytes",
|
||||
));
|
||||
}
|
||||
let cipher = Aes256Gcm::new(Key::<Aes256Gcm>::from_slice(master_key));
|
||||
Ok(Self {
|
||||
store,
|
||||
cipher,
|
||||
key_id: key_id.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
fn generate_secret() -> (String, Vec<u8>) {
|
||||
let raw = uuid::Uuid::new_v4().as_bytes().to_vec();
|
||||
let secret_b64 = STANDARD.encode(&raw);
|
||||
(secret_b64, raw)
|
||||
}
|
||||
|
||||
fn hash_secret(raw: &[u8]) -> (String, Argon2Params) {
|
||||
let salt = SaltString::generate(&mut OsRng);
|
||||
let argon2 = Argon2::default();
|
||||
let hash = argon2
|
||||
.hash_password(raw, &salt)
|
||||
.expect("argon2 hash")
|
||||
.to_string();
|
||||
let params = Argon2Params {
|
||||
m_cost_kib: argon2.params().m_cost(),
|
||||
t_cost: argon2.params().t_cost(),
|
||||
p_cost: argon2.params().p_cost(),
|
||||
salt_b64: salt.to_string(),
|
||||
};
|
||||
(hash, params)
|
||||
}
|
||||
|
||||
fn encrypt_secret(&self, raw: &[u8]) -> Result<String, Status> {
|
||||
let mut nonce_bytes = [0u8; 12];
|
||||
OsRng.fill_bytes(&mut nonce_bytes);
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
let ciphertext = self
|
||||
.cipher
|
||||
.encrypt(nonce, raw)
|
||||
.map_err(|e| Status::internal(format!("encrypt secret: {}", e)))?;
|
||||
let mut combined = nonce_bytes.to_vec();
|
||||
combined.extend_from_slice(&ciphertext);
|
||||
Ok(STANDARD.encode(combined))
|
||||
}
|
||||
|
||||
fn decrypt_secret(&self, enc_b64: &str) -> Result<Vec<u8>, Status> {
|
||||
let data = STANDARD
|
||||
.decode(enc_b64)
|
||||
.map_err(|e| Status::internal(format!("invalid b64: {}", e)))?;
|
||||
if data.len() < 12 {
|
||||
return Err(Status::internal("ciphertext too short"));
|
||||
}
|
||||
let (nonce_bytes, ct) = data.split_at(12);
|
||||
let nonce = Nonce::from_slice(nonce_bytes);
|
||||
self.cipher
|
||||
.decrypt(nonce, ct)
|
||||
.map_err(|e| Status::internal(format!("decrypt failed: {}", e)))
|
||||
}
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl IamCredential for IamCredentialService {
|
||||
async fn create_s3_credential(
|
||||
&self,
|
||||
request: Request<CreateS3CredentialRequest>,
|
||||
) -> Result<Response<CreateS3CredentialResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let now = now_ts();
|
||||
let (secret_b64, raw_secret) = Self::generate_secret();
|
||||
let (hash, kdf) = Self::hash_secret(&raw_secret);
|
||||
let secret_enc = self.encrypt_secret(&raw_secret)?;
|
||||
|
||||
let access_key_id = format!("ak_{}", uuid::Uuid::new_v4());
|
||||
let record = CredentialRecord {
|
||||
access_key_id: access_key_id.clone(),
|
||||
principal_id: req.principal_id.clone(),
|
||||
created_at: now,
|
||||
expires_at: req.expires_at,
|
||||
revoked: false,
|
||||
description: if req.description.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(req.description)
|
||||
},
|
||||
secret_hash: hash,
|
||||
secret_enc,
|
||||
key_id: self.key_id.clone(),
|
||||
version: 1,
|
||||
kdf,
|
||||
};
|
||||
|
||||
self.store
|
||||
.put(&record)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("store credential: {}", e)))?;
|
||||
|
||||
Ok(Response::new(CreateS3CredentialResponse {
|
||||
access_key_id,
|
||||
secret_key: secret_b64,
|
||||
created_at: now,
|
||||
expires_at: req.expires_at,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn get_secret_key(
|
||||
&self,
|
||||
request: Request<GetSecretKeyRequest>,
|
||||
) -> Result<Response<GetSecretKeyResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let record = match self.store.get(&req.access_key_id).await {
|
||||
Ok(Some((rec, _))) => rec,
|
||||
Ok(None) => return Err(Status::not_found("access key not found")),
|
||||
Err(e) => {
|
||||
return Err(Status::internal(format!(
|
||||
"failed to load credential: {}",
|
||||
e
|
||||
)))
|
||||
}
|
||||
};
|
||||
if record.revoked {
|
||||
return Err(Status::permission_denied("access key revoked"));
|
||||
}
|
||||
if let Some(exp) = record.expires_at {
|
||||
if now_ts() > exp {
|
||||
return Err(Status::permission_denied("access key expired"));
|
||||
}
|
||||
}
|
||||
let secret = self.decrypt_secret(&record.secret_enc)?;
|
||||
|
||||
Ok(Response::new(GetSecretKeyResponse {
|
||||
secret_key: STANDARD.encode(secret),
|
||||
principal_id: record.principal_id,
|
||||
expires_at: record.expires_at,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn list_credentials(
|
||||
&self,
|
||||
request: Request<ListCredentialsRequest>,
|
||||
) -> Result<Response<ListCredentialsResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let items = self
|
||||
.store
|
||||
.list_for_principal(&req.principal_id, 1000)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("list credentials: {}", e)))?;
|
||||
let creds: Vec<Credential> = items
|
||||
.into_iter()
|
||||
.map(|c| Credential {
|
||||
access_key_id: c.access_key_id,
|
||||
principal_id: c.principal_id,
|
||||
created_at: c.created_at,
|
||||
expires_at: c.expires_at,
|
||||
revoked: c.revoked,
|
||||
description: c.description.unwrap_or_default(),
|
||||
})
|
||||
.collect();
|
||||
Ok(Response::new(ListCredentialsResponse { credentials: creds }))
|
||||
}
|
||||
|
||||
async fn revoke_credential(
|
||||
&self,
|
||||
request: Request<RevokeCredentialRequest>,
|
||||
) -> Result<Response<RevokeCredentialResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let revoked = self
|
||||
.store
|
||||
.revoke(&req.access_key_id)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("revoke: {}", e)))?;
|
||||
Ok(Response::new(RevokeCredentialResponse { success: revoked }))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use base64::engine::general_purpose::STANDARD;
|
||||
use iam_store::Backend;
|
||||
|
||||
fn test_service() -> IamCredentialService {
|
||||
let backend = Arc::new(Backend::memory());
|
||||
let store = Arc::new(CredentialStore::new(backend));
|
||||
let master_key = [0x42u8; 32];
|
||||
IamCredentialService::new(store, &master_key, "test-key").unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn create_and_get_roundtrip() {
|
||||
let svc = test_service();
|
||||
let create = svc
|
||||
.create_s3_credential(Request::new(CreateS3CredentialRequest {
|
||||
principal_id: "p1".into(),
|
||||
description: "".into(),
|
||||
expires_at: None,
|
||||
}))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
|
||||
let get = svc
|
||||
.get_secret_key(Request::new(GetSecretKeyRequest {
|
||||
access_key_id: create.access_key_id.clone(),
|
||||
}))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
|
||||
let orig = STANDARD.decode(create.secret_key).unwrap();
|
||||
let fetched = STANDARD.decode(get.secret_key).unwrap();
|
||||
assert_eq!(orig, fetched);
|
||||
assert_eq!(get.principal_id, "p1");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_filters_by_principal() {
|
||||
let svc = test_service();
|
||||
let a = svc
|
||||
.create_s3_credential(Request::new(CreateS3CredentialRequest {
|
||||
principal_id: "pA".into(),
|
||||
description: "".into(),
|
||||
expires_at: None,
|
||||
}))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
let _b = svc
|
||||
.create_s3_credential(Request::new(CreateS3CredentialRequest {
|
||||
principal_id: "pB".into(),
|
||||
description: "".into(),
|
||||
expires_at: None,
|
||||
}))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let list_a = svc
|
||||
.list_credentials(Request::new(ListCredentialsRequest {
|
||||
principal_id: "pA".into(),
|
||||
}))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
assert_eq!(list_a.credentials.len(), 1);
|
||||
assert_eq!(list_a.credentials[0].access_key_id, a.access_key_id);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn revoke_blocks_get() {
|
||||
let svc = test_service();
|
||||
let created = svc
|
||||
.create_s3_credential(Request::new(CreateS3CredentialRequest {
|
||||
principal_id: "p1".into(),
|
||||
description: "".into(),
|
||||
expires_at: None,
|
||||
}))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
|
||||
let revoke1 = svc
|
||||
.revoke_credential(Request::new(RevokeCredentialRequest {
|
||||
access_key_id: created.access_key_id.clone(),
|
||||
reason: "test".into(),
|
||||
}))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
assert!(revoke1.success);
|
||||
|
||||
let revoke2 = svc
|
||||
.revoke_credential(Request::new(RevokeCredentialRequest {
|
||||
access_key_id: created.access_key_id.clone(),
|
||||
reason: "again".into(),
|
||||
}))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
assert!(!revoke2.success);
|
||||
|
||||
let err = svc
|
||||
.get_secret_key(Request::new(GetSecretKeyRequest {
|
||||
access_key_id: created.access_key_id,
|
||||
}))
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert_eq!(err.code(), Status::permission_denied("").code());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn expired_key_is_denied() {
|
||||
let svc = test_service();
|
||||
// Manually insert an expired record
|
||||
let expired = CredentialRecord {
|
||||
access_key_id: "expired-ak".into(),
|
||||
principal_id: "p1".into(),
|
||||
created_at: now_ts(),
|
||||
expires_at: Some(now_ts() - 10),
|
||||
revoked: false,
|
||||
description: None,
|
||||
secret_hash: "hash".into(),
|
||||
secret_enc: STANDARD.encode(b"dead"),
|
||||
key_id: "k".into(),
|
||||
version: 1,
|
||||
kdf: Argon2Params {
|
||||
m_cost_kib: 19456,
|
||||
t_cost: 2,
|
||||
p_cost: 1,
|
||||
salt_b64: "c2FsdA==".into(),
|
||||
},
|
||||
};
|
||||
svc.store.put(&expired).await.unwrap();
|
||||
let err = svc
|
||||
.get_secret_key(Request::new(GetSecretKeyRequest {
|
||||
access_key_id: "expired-ak".into(),
|
||||
}))
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert_eq!(err.code(), Status::permission_denied("").code());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn master_key_length_enforced() {
|
||||
let backend = Arc::new(Backend::memory());
|
||||
let store = Arc::new(CredentialStore::new(backend));
|
||||
let bad = IamCredentialService::new(store.clone(), &[0u8; 16], "k");
|
||||
assert!(bad.is_err());
|
||||
}
|
||||
}
|
||||
433
iam/crates/iam-api/src/gateway_auth_service.rs
Normal file
433
iam/crates/iam-api/src/gateway_auth_service.rs
Normal file
|
|
@ -0,0 +1,433 @@
|
|||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
use apigateway_api::proto::{AuthorizeRequest, AuthorizeResponse, Subject};
|
||||
use apigateway_api::GatewayAuthService;
|
||||
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine};
|
||||
use iam_authz::{AuthzContext, AuthzDecision, AuthzRequest, PolicyEvaluator};
|
||||
use iam_authn::InternalTokenService;
|
||||
use iam_store::{PrincipalStore, TokenStore};
|
||||
use iam_types::{InternalTokenClaims, Principal, PrincipalRef, Resource};
|
||||
use sha2::{Digest, Sha256};
|
||||
use tonic::{Request, Response, Status};
|
||||
|
||||
pub struct GatewayAuthServiceImpl {
|
||||
token_service: Arc<InternalTokenService>,
|
||||
principal_store: Arc<PrincipalStore>,
|
||||
token_store: Arc<TokenStore>,
|
||||
evaluator: Arc<PolicyEvaluator>,
|
||||
}
|
||||
|
||||
impl GatewayAuthServiceImpl {
|
||||
pub fn new(
|
||||
token_service: Arc<InternalTokenService>,
|
||||
principal_store: Arc<PrincipalStore>,
|
||||
token_store: Arc<TokenStore>,
|
||||
evaluator: Arc<PolicyEvaluator>,
|
||||
) -> Self {
|
||||
Self {
|
||||
token_service,
|
||||
principal_store,
|
||||
token_store,
|
||||
evaluator,
|
||||
}
|
||||
}
|
||||
|
||||
async fn check_token_revoked(
|
||||
&self,
|
||||
principal_id: &str,
|
||||
token: &str,
|
||||
) -> Result<Option<String>, Status> {
|
||||
let token_id = compute_token_id(token);
|
||||
let meta = self
|
||||
.token_store
|
||||
.get(principal_id, &token_id)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("token store error: {}", e)))?;
|
||||
|
||||
if let Some((meta, _)) = meta {
|
||||
if meta.revoked {
|
||||
let reason = meta
|
||||
.revocation_reason
|
||||
.unwrap_or_else(|| "token revoked".to_string());
|
||||
return Ok(Some(reason));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl GatewayAuthService for GatewayAuthServiceImpl {
|
||||
async fn authorize(
|
||||
&self,
|
||||
request: Request<AuthorizeRequest>,
|
||||
) -> Result<Response<AuthorizeResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let token = req.token.trim();
|
||||
|
||||
if token.is_empty() {
|
||||
return Ok(Response::new(deny_response("missing token")));
|
||||
}
|
||||
|
||||
let claims = match self.token_service.verify(token).await {
|
||||
Ok(claims) => claims,
|
||||
Err(err) => return Ok(Response::new(deny_response(err.to_string()))),
|
||||
};
|
||||
|
||||
if let Some(reason) = self.check_token_revoked(&claims.principal_id, token).await? {
|
||||
return Ok(Response::new(deny_response(reason)));
|
||||
}
|
||||
|
||||
let principal_ref = PrincipalRef::new(claims.principal_kind.clone(), &claims.principal_id);
|
||||
let principal = match self.principal_store.get(&principal_ref).await {
|
||||
Ok(Some(principal)) => principal,
|
||||
Ok(None) => return Ok(Response::new(deny_response("principal not found"))),
|
||||
Err(err) => {
|
||||
return Err(Status::internal(format!(
|
||||
"failed to read principal: {}",
|
||||
err
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
if !principal.enabled {
|
||||
return Ok(Response::new(deny_response("principal disabled")));
|
||||
}
|
||||
|
||||
let (action, resource, context, org_id, project_id) =
|
||||
build_authz_request(&req, &claims, &principal);
|
||||
let authz_request =
|
||||
AuthzRequest::new(principal.clone(), action, resource).with_context(context);
|
||||
let decision = self
|
||||
.evaluator
|
||||
.evaluate(&authz_request)
|
||||
.await
|
||||
.map_err(|e| Status::internal(format!("authz evaluation failed: {}", e)))?;
|
||||
|
||||
match decision {
|
||||
AuthzDecision::Allow => {}
|
||||
AuthzDecision::Deny { reason } => {
|
||||
return Ok(Response::new(deny_response(reason)));
|
||||
}
|
||||
}
|
||||
|
||||
let subject = Subject {
|
||||
subject_id: claims.principal_id.clone(),
|
||||
org_id,
|
||||
project_id,
|
||||
roles: claims.roles.clone(),
|
||||
scopes: vec![claims.scope.to_string()],
|
||||
};
|
||||
|
||||
let ttl_seconds = ttl_from_claims(claims.exp);
|
||||
let mut headers = HashMap::new();
|
||||
headers.insert("x-iam-session-id".to_string(), claims.session_id.clone());
|
||||
headers.insert(
|
||||
"x-iam-principal-kind".to_string(),
|
||||
claims.principal_kind.to_string(),
|
||||
);
|
||||
headers.insert(
|
||||
"x-iam-auth-method".to_string(),
|
||||
claims.auth_method.to_string(),
|
||||
);
|
||||
|
||||
Ok(Response::new(AuthorizeResponse {
|
||||
allow: true,
|
||||
reason: String::new(),
|
||||
subject: Some(subject),
|
||||
headers,
|
||||
ttl_seconds,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
fn deny_response(reason: impl Into<String>) -> AuthorizeResponse {
|
||||
AuthorizeResponse {
|
||||
allow: false,
|
||||
reason: reason.into(),
|
||||
subject: None,
|
||||
headers: HashMap::new(),
|
||||
ttl_seconds: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_token_id(token: &str) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(token.as_bytes());
|
||||
let digest = hasher.finalize();
|
||||
URL_SAFE_NO_PAD.encode(digest)
|
||||
}
|
||||
|
||||
fn ttl_from_claims(exp: u64) -> u32 {
|
||||
let now = now_ts();
|
||||
let remaining = exp.saturating_sub(now);
|
||||
u32::try_from(remaining).unwrap_or(u32::MAX)
|
||||
}
|
||||
|
||||
fn build_authz_request(
|
||||
req: &AuthorizeRequest,
|
||||
claims: &InternalTokenClaims,
|
||||
principal: &Principal,
|
||||
) -> (String, Resource, AuthzContext, String, String) {
|
||||
let action = action_for_request(req);
|
||||
let (org_id, project_id) = resolve_org_project(req, claims, principal);
|
||||
let mut resource = Resource::new(
|
||||
"gateway_route",
|
||||
resource_id_for_request(req),
|
||||
org_id.clone(),
|
||||
project_id.clone(),
|
||||
);
|
||||
resource = resource
|
||||
.with_tag("route", req.route_name.clone())
|
||||
.with_tag("method", req.method.clone())
|
||||
.with_tag("path", req.path.clone());
|
||||
if !req.raw_query.is_empty() {
|
||||
resource = resource.with_tag("raw_query", req.raw_query.clone());
|
||||
}
|
||||
|
||||
let mut context = AuthzContext::new()
|
||||
.with_http_method(req.method.clone())
|
||||
.with_request_path(req.path.clone())
|
||||
.with_metadata("route", req.route_name.clone())
|
||||
.with_metadata("request_id", req.request_id.clone())
|
||||
.with_metadata("org_id", org_id.clone())
|
||||
.with_metadata("project_id", project_id.clone());
|
||||
if !req.raw_query.is_empty() {
|
||||
context = context.with_metadata("raw_query", req.raw_query.clone());
|
||||
}
|
||||
if let Ok(ip) = req.client_ip.parse() {
|
||||
context = context.with_source_ip(ip);
|
||||
}
|
||||
|
||||
(action, resource, context, org_id, project_id)
|
||||
}
|
||||
|
||||
fn action_for_request(req: &AuthorizeRequest) -> String {
|
||||
let route = if req.route_name.trim().is_empty() {
|
||||
"gateway"
|
||||
} else {
|
||||
req.route_name.trim()
|
||||
};
|
||||
let verb = method_to_verb(&req.method);
|
||||
format!("gateway:{}:{}", normalize_action_component(route), verb)
|
||||
}
|
||||
|
||||
fn method_to_verb(method: &str) -> &'static str {
|
||||
match method.trim().to_uppercase().as_str() {
|
||||
"GET" | "HEAD" => "read",
|
||||
"POST" => "create",
|
||||
"PUT" | "PATCH" => "update",
|
||||
"DELETE" => "delete",
|
||||
"OPTIONS" => "list",
|
||||
_ => "execute",
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_action_component(value: &str) -> String {
|
||||
value
|
||||
.chars()
|
||||
.map(|ch| {
|
||||
if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
|
||||
ch.to_ascii_lowercase()
|
||||
} else {
|
||||
'_'
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn resource_id_for_request(req: &AuthorizeRequest) -> String {
|
||||
if !req.route_name.trim().is_empty() {
|
||||
return req.route_name.trim().to_string();
|
||||
}
|
||||
let path = req.path.trim_matches('/');
|
||||
if path.is_empty() {
|
||||
"root".to_string()
|
||||
} else {
|
||||
path.replace('/', ":")
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_org_project(
|
||||
req: &AuthorizeRequest,
|
||||
claims: &InternalTokenClaims,
|
||||
principal: &Principal,
|
||||
) -> (String, String) {
|
||||
let org_id = claims
|
||||
.org_id
|
||||
.clone()
|
||||
.or_else(|| claims.scope.org_id().map(|value| value.to_string()))
|
||||
.or_else(|| principal.org_id.clone())
|
||||
.or_else(|| header_value(&req.headers, "x-org-id"))
|
||||
.unwrap_or_else(|| "system".to_string());
|
||||
|
||||
let project_id = claims
|
||||
.project_id
|
||||
.clone()
|
||||
.or_else(|| claims.scope.project_id().map(|value| value.to_string()))
|
||||
.or_else(|| principal.project_id.clone())
|
||||
.or_else(|| header_value(&req.headers, "x-project-id"))
|
||||
.unwrap_or_else(|| "system".to_string());
|
||||
|
||||
(org_id, project_id)
|
||||
}
|
||||
|
||||
fn header_value(headers: &HashMap<String, String>, key: &str) -> Option<String> {
|
||||
headers
|
||||
.get(&key.to_ascii_lowercase())
|
||||
.map(|value| value.trim().to_string())
|
||||
.filter(|value| !value.is_empty())
|
||||
}
|
||||
|
||||
fn now_ts() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use iam_authn::{InternalTokenConfig, SigningKey};
|
||||
use iam_authz::{PolicyCache, PolicyEvaluator};
|
||||
use iam_store::{Backend, BackendConfig, BindingStore, PrincipalStore, RoleStore, TokenStore};
|
||||
use iam_types::{
|
||||
Permission, PolicyBinding, Principal, PrincipalRef, Role, Scope, TokenMetadata, TokenType,
|
||||
};
|
||||
use std::time::Duration;
|
||||
|
||||
fn make_request(token: &str) -> AuthorizeRequest {
|
||||
AuthorizeRequest {
|
||||
request_id: "req-1".into(),
|
||||
token: token.to_string(),
|
||||
method: "GET".into(),
|
||||
path: "/v1/example".into(),
|
||||
raw_query: "".into(),
|
||||
headers: HashMap::new(),
|
||||
client_ip: "127.0.0.1".into(),
|
||||
route_name: "example".into(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn build_service() -> (
|
||||
GatewayAuthServiceImpl,
|
||||
Arc<InternalTokenService>,
|
||||
Arc<RoleStore>,
|
||||
Arc<BindingStore>,
|
||||
Arc<TokenStore>,
|
||||
Principal,
|
||||
) {
|
||||
let backend = Arc::new(Backend::new(BackendConfig::Memory).await.unwrap());
|
||||
let principal_store = Arc::new(PrincipalStore::new(backend.clone()));
|
||||
let role_store = Arc::new(RoleStore::new(backend.clone()));
|
||||
let binding_store = Arc::new(BindingStore::new(backend.clone()));
|
||||
let token_store = Arc::new(TokenStore::new(backend));
|
||||
let signing_key = SigningKey::generate("test-key-1");
|
||||
let token_config = InternalTokenConfig::new(signing_key, "iam-test")
|
||||
.with_default_ttl(Duration::from_secs(3600))
|
||||
.with_max_ttl(Duration::from_secs(7200));
|
||||
let token_service = Arc::new(InternalTokenService::new(token_config));
|
||||
let mut principal = Principal::new_user("user-1", "User One");
|
||||
principal.org_id = Some("org-1".into());
|
||||
principal.project_id = Some("proj-1".into());
|
||||
principal_store.create(&principal).await.unwrap();
|
||||
let cache = Arc::new(PolicyCache::default_config());
|
||||
let evaluator = Arc::new(PolicyEvaluator::new(
|
||||
binding_store.clone(),
|
||||
role_store.clone(),
|
||||
cache,
|
||||
));
|
||||
let service = GatewayAuthServiceImpl::new(
|
||||
token_service.clone(),
|
||||
principal_store.clone(),
|
||||
token_store.clone(),
|
||||
evaluator,
|
||||
);
|
||||
(service, token_service, role_store, binding_store, token_store, principal)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_authorize_missing_token_denies() {
|
||||
let (service, _, _, _, _, _) = build_service().await;
|
||||
let response = service
|
||||
.authorize(Request::new(make_request("")))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
assert!(!response.allow);
|
||||
assert!(response.reason.contains("missing token"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_authorize_valid_token_allows() {
|
||||
let (service, token_service, role_store, binding_store, _, principal) =
|
||||
build_service().await;
|
||||
let role = Role::new(
|
||||
"GatewayReader",
|
||||
Scope::project("proj-1", "org-1"),
|
||||
vec![Permission::new("gateway:example:read", "*")],
|
||||
);
|
||||
role_store.create(&role).await.unwrap();
|
||||
let binding = PolicyBinding::new(
|
||||
"binding-1",
|
||||
PrincipalRef::new(principal.kind.clone(), principal.id.clone()),
|
||||
role.to_ref(),
|
||||
Scope::project("proj-1", "org-1"),
|
||||
);
|
||||
binding_store.create(&binding).await.unwrap();
|
||||
let issued = token_service
|
||||
.issue(&principal, vec!["role-1".into()], Scope::system(), None)
|
||||
.await
|
||||
.unwrap();
|
||||
let response = service
|
||||
.authorize(Request::new(make_request(&issued.token)))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
assert!(response.allow);
|
||||
let subject = response.subject.expect("subject");
|
||||
assert_eq!(subject.subject_id, principal.id);
|
||||
assert_eq!(subject.roles, vec!["role-1".to_string()]);
|
||||
assert_eq!(subject.scopes, vec!["system".to_string()]);
|
||||
assert!(response.ttl_seconds > 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_authorize_revoked_token_denies() {
|
||||
let (service, token_service, _, _, token_store, principal) = build_service().await;
|
||||
let issued = token_service
|
||||
.issue(&principal, vec![], Scope::system(), None)
|
||||
.await
|
||||
.unwrap();
|
||||
let token_id = compute_token_id(&issued.token);
|
||||
let meta = TokenMetadata::new(
|
||||
&token_id,
|
||||
&issued.claims.principal_id,
|
||||
TokenType::Access,
|
||||
issued.claims.iat,
|
||||
issued.claims.exp,
|
||||
);
|
||||
token_store.put(&meta).await.unwrap();
|
||||
token_store
|
||||
.revoke(
|
||||
&issued.claims.principal_id,
|
||||
&token_id,
|
||||
"test revoke",
|
||||
now_ts(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let response = service
|
||||
.authorize(Request::new(make_request(&issued.token)))
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
assert!(!response.allow);
|
||||
assert!(response.reason.contains("revoke"));
|
||||
}
|
||||
}
|
||||
14
iam/crates/iam-client/examples/basic.rs
Normal file
14
iam/crates/iam-client/examples/basic.rs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
use iam_client::IamClientBuilder;
|
||||
use photocloud_client_common::AuthConfig;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Build IAM client with optional bearer auth.
|
||||
let client = IamClientBuilder::new("https://127.0.0.1:2443")
|
||||
.auth(AuthConfig::None)
|
||||
.build()
|
||||
.await?;
|
||||
|
||||
println!("IAM client ready");
|
||||
Ok(())
|
||||
}
|
||||
68
iam/crates/iam-store/src/credential_store.rs
Normal file
68
iam/crates/iam-store/src/credential_store.rs
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
//! Credential storage (access/secret key metadata)
|
||||
|
||||
use iam_types::{CredentialRecord, Result};
|
||||
|
||||
use crate::backend::JsonStore;
|
||||
use crate::{DynMetadataClient, MetadataClient};
|
||||
|
||||
/// Store for credentials (S3/API keys)
|
||||
pub struct CredentialStore {
|
||||
client: DynMetadataClient,
|
||||
}
|
||||
|
||||
impl JsonStore for CredentialStore {
|
||||
fn client(&self) -> &dyn MetadataClient {
|
||||
self.client.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl CredentialStore {
|
||||
pub fn new(client: DynMetadataClient) -> Self {
|
||||
Self { client }
|
||||
}
|
||||
|
||||
pub async fn put(&self, record: &CredentialRecord) -> Result<u64> {
|
||||
let key = CredentialRecord::storage_key(&record.access_key_id);
|
||||
self.put_json(key.as_bytes(), record).await
|
||||
}
|
||||
|
||||
pub async fn get(&self, access_key_id: &str) -> Result<Option<(CredentialRecord, u64)>> {
|
||||
let key = CredentialRecord::storage_key(access_key_id);
|
||||
self.get_json(key.as_bytes()).await
|
||||
}
|
||||
|
||||
pub async fn list_for_principal(
|
||||
&self,
|
||||
principal_id: &str,
|
||||
limit: u32,
|
||||
) -> Result<Vec<CredentialRecord>> {
|
||||
// scan prefix and filter by principal_id; small cardinality expected
|
||||
let prefix = b"iam/credentials/";
|
||||
let items = self.scan_prefix_json::<CredentialRecord>(prefix, limit).await?;
|
||||
Ok(items
|
||||
.into_iter()
|
||||
.filter(|rec| rec.principal_id == principal_id)
|
||||
.collect())
|
||||
}
|
||||
|
||||
pub async fn revoke(&self, access_key_id: &str) -> Result<bool> {
|
||||
let key = CredentialRecord::storage_key(access_key_id);
|
||||
let current = self.get_json::<CredentialRecord>(key.as_bytes()).await?;
|
||||
let (mut record, version) = match current {
|
||||
Some(v) => v,
|
||||
None => return Ok(false),
|
||||
};
|
||||
if record.revoked {
|
||||
return Ok(false);
|
||||
}
|
||||
record.revoked = true;
|
||||
match self
|
||||
.cas_json(key.as_bytes(), version, &record)
|
||||
.await?
|
||||
{
|
||||
crate::CasResult::Success(_) => Ok(true),
|
||||
crate::CasResult::Conflict { .. } => Ok(false),
|
||||
crate::CasResult::NotFound => Ok(false),
|
||||
}
|
||||
}
|
||||
}
|
||||
35
iam/crates/iam-types/src/credential.rs
Normal file
35
iam/crates/iam-types/src/credential.rs
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
//! Credential metadata for access/secret keys
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Argon2 parameters used to hash the secret key
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct Argon2Params {
|
||||
pub m_cost_kib: u32,
|
||||
pub t_cost: u32,
|
||||
pub p_cost: u32,
|
||||
/// Salt in base64
|
||||
pub salt_b64: String,
|
||||
}
|
||||
|
||||
/// Stored record for an IAM credential
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct CredentialRecord {
|
||||
pub access_key_id: String,
|
||||
pub principal_id: String,
|
||||
pub created_at: u64,
|
||||
pub expires_at: Option<u64>,
|
||||
pub revoked: bool,
|
||||
pub description: Option<String>,
|
||||
pub secret_hash: String,
|
||||
pub secret_enc: String,
|
||||
pub key_id: String,
|
||||
pub version: u32,
|
||||
pub kdf: Argon2Params,
|
||||
}
|
||||
|
||||
impl CredentialRecord {
|
||||
pub fn storage_key(access_key_id: &str) -> String {
|
||||
format!("iam/credentials/{}", access_key_id)
|
||||
}
|
||||
}
|
||||
10
k8shost/crates/k8shost-csi/build.rs
Normal file
10
k8shost/crates/k8shost-csi/build.rs
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let protoc_path = protoc_bin_vendored::protoc_bin_path()?;
|
||||
std::env::set_var("PROTOC", protoc_path);
|
||||
|
||||
tonic_build::configure()
|
||||
.build_server(true)
|
||||
.build_client(true)
|
||||
.compile(&["proto/csi.proto"], &["proto"])?;
|
||||
Ok(())
|
||||
}
|
||||
1914
k8shost/crates/k8shost-csi/proto/csi.proto
Normal file
1914
k8shost/crates/k8shost-csi/proto/csi.proto
Normal file
File diff suppressed because it is too large
Load diff
47
lightningstor/crates/lightningstor-distributed/Cargo.toml
Normal file
47
lightningstor/crates/lightningstor-distributed/Cargo.toml
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
[package]
|
||||
name = "lightningstor-distributed"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
rust-version.workspace = true
|
||||
description = "Distributed storage backends for LightningStor (Erasure Coding & Replication)"
|
||||
|
||||
[dependencies]
|
||||
# Internal crates
|
||||
lightningstor-types = { workspace = true }
|
||||
lightningstor-storage = { workspace = true }
|
||||
lightningstor-node = { workspace = true }
|
||||
|
||||
# Async runtime
|
||||
tokio = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
|
||||
# gRPC
|
||||
tonic = { workspace = true }
|
||||
prost = { workspace = true }
|
||||
|
||||
# Serialization
|
||||
serde = { workspace = true }
|
||||
|
||||
# Utilities
|
||||
thiserror = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
bytes = { workspace = true }
|
||||
dashmap = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
|
||||
# Erasure coding
|
||||
reed-solomon-erasure = "6.0"
|
||||
|
||||
# Consistent hashing
|
||||
hashring = "0.3"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
tokio = { workspace = true, features = ["rt-multi-thread", "macros"] }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
|
@ -0,0 +1,848 @@
|
|||
//! Erasure-coded distributed storage backend
|
||||
//!
|
||||
//! Implements StorageBackend using Reed-Solomon erasure coding for
|
||||
//! storage-efficient redundancy.
|
||||
|
||||
use crate::chunk::{ChunkId, ChunkManager};
|
||||
use crate::config::DistributedConfig;
|
||||
use crate::erasure::Codec;
|
||||
use crate::node::{NodeClientTrait, NodeRegistry};
|
||||
use crate::placement::{ConsistentHashSelector, NodeSelector};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use lightningstor_storage::{StorageBackend, StorageError, StorageResult};
|
||||
use lightningstor_types::ObjectId;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, error, warn};
|
||||
|
||||
/// Metadata for an object stored with erasure coding
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ObjectMetadata {
|
||||
/// Number of chunks the object is split into
|
||||
pub chunk_count: usize,
|
||||
/// Original size of the object in bytes
|
||||
pub original_size: u64,
|
||||
/// Size of each chunk (except possibly the last)
|
||||
pub chunk_size: usize,
|
||||
}
|
||||
|
||||
impl ObjectMetadata {
|
||||
/// Create new object metadata
|
||||
pub fn new(original_size: u64, chunk_count: usize, chunk_size: usize) -> Self {
|
||||
Self {
|
||||
chunk_count,
|
||||
original_size,
|
||||
chunk_size,
|
||||
}
|
||||
}
|
||||
|
||||
/// Serialize to bytes
|
||||
pub fn to_bytes(&self) -> Vec<u8> {
|
||||
// Simple format: chunk_count (8 bytes) + original_size (8 bytes) + chunk_size (8 bytes)
|
||||
let mut bytes = Vec::with_capacity(24);
|
||||
bytes.extend_from_slice(&(self.chunk_count as u64).to_le_bytes());
|
||||
bytes.extend_from_slice(&self.original_size.to_le_bytes());
|
||||
bytes.extend_from_slice(&(self.chunk_size as u64).to_le_bytes());
|
||||
bytes
|
||||
}
|
||||
|
||||
/// Deserialize from bytes
|
||||
pub fn from_bytes(bytes: &[u8]) -> Option<Self> {
|
||||
if bytes.len() < 24 {
|
||||
return None;
|
||||
}
|
||||
let chunk_count = u64::from_le_bytes(bytes[0..8].try_into().ok()?) as usize;
|
||||
let original_size = u64::from_le_bytes(bytes[8..16].try_into().ok()?);
|
||||
let chunk_size = u64::from_le_bytes(bytes[16..24].try_into().ok()?) as usize;
|
||||
Some(Self {
|
||||
chunk_count,
|
||||
original_size,
|
||||
chunk_size,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the metadata key for an object
|
||||
pub fn metadata_key(object_id: &ObjectId) -> String {
|
||||
format!("{}_meta", object_id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Erasure-coded distributed storage backend
|
||||
///
|
||||
/// Stores objects by:
|
||||
/// 1. Splitting into chunks (for large objects)
|
||||
/// 2. Encoding each chunk into data + parity shards using Reed-Solomon
|
||||
/// 3. Distributing shards across storage nodes
|
||||
///
|
||||
/// Can tolerate loss of up to `parity_shards` nodes without data loss.
|
||||
pub struct ErasureCodedBackend {
|
||||
/// Erasure coding codec
|
||||
codec: Arc<Codec>,
|
||||
/// Node registry for discovering storage nodes
|
||||
node_registry: Arc<dyn NodeRegistry>,
|
||||
/// Node selector for placement decisions
|
||||
node_selector: Arc<dyn NodeSelector>,
|
||||
/// Chunk manager for splitting/reassembling
|
||||
chunk_manager: Arc<ChunkManager>,
|
||||
/// Configuration (kept for future use)
|
||||
#[allow(dead_code)]
|
||||
config: DistributedConfig,
|
||||
/// Number of data shards
|
||||
data_shards: usize,
|
||||
/// Number of parity shards
|
||||
parity_shards: usize,
|
||||
}
|
||||
|
||||
impl ErasureCodedBackend {
|
||||
/// Create a new erasure-coded backend
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `config` - Distributed storage configuration (must have ErasureCoded redundancy mode)
|
||||
/// * `node_registry` - Registry for discovering storage nodes
|
||||
pub async fn new(
|
||||
config: DistributedConfig,
|
||||
node_registry: Arc<dyn NodeRegistry>,
|
||||
) -> StorageResult<Self> {
|
||||
let (data_shards, parity_shards) = match &config.redundancy {
|
||||
crate::config::RedundancyMode::ErasureCoded {
|
||||
data_shards,
|
||||
parity_shards,
|
||||
} => (*data_shards, *parity_shards),
|
||||
_ => {
|
||||
return Err(StorageError::Backend(
|
||||
"ErasureCodedBackend requires ErasureCoded redundancy mode".into(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let codec = Arc::new(
|
||||
Codec::new(data_shards, parity_shards)
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?,
|
||||
);
|
||||
let node_selector = Arc::new(ConsistentHashSelector::new());
|
||||
let chunk_manager = Arc::new(ChunkManager::new(config.chunk.clone()));
|
||||
|
||||
Ok(Self {
|
||||
codec,
|
||||
node_registry,
|
||||
node_selector,
|
||||
chunk_manager,
|
||||
config,
|
||||
data_shards,
|
||||
parity_shards,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the total number of shards (data + parity)
|
||||
pub fn total_shards(&self) -> usize {
|
||||
self.data_shards + self.parity_shards
|
||||
}
|
||||
|
||||
/// Get the minimum number of shards needed to reconstruct data
|
||||
pub fn min_shards_for_read(&self) -> usize {
|
||||
self.data_shards
|
||||
}
|
||||
|
||||
/// Select nodes for writing shards
|
||||
async fn select_nodes_for_write(&self) -> StorageResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_healthy_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
let total_shards = self.total_shards();
|
||||
if nodes.len() < total_shards {
|
||||
return Err(StorageError::Backend(format!(
|
||||
"Not enough healthy nodes: need {}, have {}",
|
||||
total_shards,
|
||||
nodes.len()
|
||||
)));
|
||||
}
|
||||
|
||||
self.node_selector
|
||||
.select_nodes(&nodes, total_shards)
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))
|
||||
}
|
||||
|
||||
/// Store object metadata to nodes
|
||||
async fn write_metadata(
|
||||
&self,
|
||||
object_id: &ObjectId,
|
||||
metadata: &ObjectMetadata,
|
||||
) -> StorageResult<()> {
|
||||
let meta_key = ObjectMetadata::metadata_key(object_id);
|
||||
let meta_bytes = Bytes::from(metadata.to_bytes());
|
||||
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_healthy_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
// Store metadata on multiple nodes for redundancy
|
||||
let mut write_futures = Vec::new();
|
||||
for node in nodes.iter().take(self.total_shards()) {
|
||||
let node = node.clone();
|
||||
let key = meta_key.clone();
|
||||
let data = meta_bytes.clone();
|
||||
write_futures.push(async move {
|
||||
node.put_chunk(&key, 0, false, data).await
|
||||
});
|
||||
}
|
||||
|
||||
let results = futures::future::join_all(write_futures).await;
|
||||
let success_count = results.iter().filter(|r| r.is_ok()).count();
|
||||
|
||||
// Need at least one successful write
|
||||
if success_count == 0 {
|
||||
return Err(StorageError::Backend(
|
||||
"Failed to write object metadata to any node".into(),
|
||||
));
|
||||
}
|
||||
|
||||
debug!(
|
||||
object_id = %object_id,
|
||||
success_count,
|
||||
"Stored object metadata"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read object metadata from nodes
|
||||
async fn read_metadata(&self, object_id: &ObjectId) -> StorageResult<ObjectMetadata> {
|
||||
let meta_key = ObjectMetadata::metadata_key(object_id);
|
||||
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_all_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
// Try to read metadata from any node
|
||||
for node in &nodes {
|
||||
if let Ok(data) = node.get_chunk(&meta_key, 0, false).await {
|
||||
if let Some(metadata) = ObjectMetadata::from_bytes(&data) {
|
||||
return Ok(metadata);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(StorageError::NotFound(*object_id))
|
||||
}
|
||||
|
||||
/// Delete object metadata from all nodes
|
||||
async fn delete_metadata(&self, object_id: &ObjectId) {
|
||||
let meta_key = ObjectMetadata::metadata_key(object_id);
|
||||
|
||||
let nodes = match self.node_registry.get_all_nodes().await {
|
||||
Ok(nodes) => nodes,
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
let mut delete_futures = Vec::new();
|
||||
for node in &nodes {
|
||||
let node = node.clone();
|
||||
let key = meta_key.clone();
|
||||
delete_futures.push(async move {
|
||||
let _ = node.delete_chunk(&key).await;
|
||||
});
|
||||
}
|
||||
|
||||
futures::future::join_all(delete_futures).await;
|
||||
}
|
||||
|
||||
/// Write a single chunk with erasure coding
|
||||
async fn write_chunk(
|
||||
&self,
|
||||
object_id: &ObjectId,
|
||||
chunk_index: usize,
|
||||
chunk_data: &[u8],
|
||||
) -> StorageResult<()> {
|
||||
// Encode the chunk
|
||||
let shards = self
|
||||
.codec
|
||||
.encode(chunk_data)
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
// Select nodes for each shard
|
||||
let nodes = self.select_nodes_for_write().await?;
|
||||
|
||||
// Write shards in parallel
|
||||
let mut write_futures = Vec::with_capacity(self.total_shards());
|
||||
for (shard_idx, (shard_data, node)) in shards.into_iter().zip(nodes.iter()).enumerate() {
|
||||
let is_parity = shard_idx >= self.data_shards;
|
||||
let chunk_id = ChunkId::new(object_id, chunk_index, shard_idx, is_parity);
|
||||
let node = node.clone();
|
||||
let shard_bytes = Bytes::from(shard_data);
|
||||
|
||||
write_futures.push(async move {
|
||||
node.put_chunk(&chunk_id.to_key(), shard_idx as u32, is_parity, shard_bytes)
|
||||
.await
|
||||
});
|
||||
}
|
||||
|
||||
// Wait for all writes
|
||||
let results = futures::future::join_all(write_futures).await;
|
||||
let success_count = results.iter().filter(|r| r.is_ok()).count();
|
||||
let error_count = results.len() - success_count;
|
||||
|
||||
debug!(
|
||||
object_id = %object_id,
|
||||
chunk_index,
|
||||
success_count,
|
||||
error_count,
|
||||
"Wrote erasure-coded chunk"
|
||||
);
|
||||
|
||||
// Need at least data_shards + 1 for durability
|
||||
let min_required = self.data_shards + 1;
|
||||
if success_count < min_required {
|
||||
let errors: Vec<_> = results
|
||||
.into_iter()
|
||||
.filter_map(|r| r.err())
|
||||
.collect();
|
||||
error!(
|
||||
success_count,
|
||||
min_required,
|
||||
errors = ?errors,
|
||||
"Failed to write enough shards"
|
||||
);
|
||||
return Err(StorageError::Backend(format!(
|
||||
"Failed to write enough shards: {} of {} required succeeded",
|
||||
success_count, min_required
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Read a single chunk with erasure decoding
|
||||
async fn read_chunk(
|
||||
&self,
|
||||
object_id: &ObjectId,
|
||||
chunk_index: usize,
|
||||
original_chunk_size: usize,
|
||||
) -> StorageResult<Vec<u8>> {
|
||||
// Use all nodes for reads - unhealthy nodes might still have data we need
|
||||
// The erasure coding handles actual failures gracefully
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_all_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
// Try to read all shards in parallel
|
||||
let mut shard_futures = Vec::with_capacity(self.total_shards());
|
||||
for shard_idx in 0..self.total_shards() {
|
||||
let is_parity = shard_idx >= self.data_shards;
|
||||
let chunk_id = ChunkId::new(object_id, chunk_index, shard_idx, is_parity);
|
||||
let nodes = nodes.clone();
|
||||
let node_selector = self.node_selector.clone();
|
||||
let chunk_key = chunk_id.to_key();
|
||||
|
||||
shard_futures.push(async move {
|
||||
// Try to read from the preferred node first
|
||||
if let Ok(node) = node_selector.select_for_read(&nodes, &chunk_key).await {
|
||||
if let Ok(data) = node
|
||||
.get_chunk(&chunk_key, shard_idx as u32, is_parity)
|
||||
.await
|
||||
{
|
||||
return Some(data);
|
||||
}
|
||||
}
|
||||
|
||||
// Try other nodes if preferred fails
|
||||
for node in &nodes {
|
||||
if let Ok(data) = node
|
||||
.get_chunk(&chunk_key, shard_idx as u32, is_parity)
|
||||
.await
|
||||
{
|
||||
return Some(data);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
});
|
||||
}
|
||||
|
||||
let shard_results: Vec<Option<Vec<u8>>> = futures::future::join_all(shard_futures).await;
|
||||
|
||||
// Count available shards
|
||||
let available_count = shard_results.iter().filter(|s| s.is_some()).count();
|
||||
|
||||
debug!(
|
||||
object_id = %object_id,
|
||||
chunk_index,
|
||||
available_count,
|
||||
required = self.data_shards,
|
||||
"Read shards for decoding"
|
||||
);
|
||||
|
||||
if available_count < self.data_shards {
|
||||
return Err(StorageError::Backend(format!(
|
||||
"Not enough shards for decoding: have {}, need {}",
|
||||
available_count, self.data_shards
|
||||
)));
|
||||
}
|
||||
|
||||
// Decode
|
||||
self.codec
|
||||
.decode(shard_results, original_chunk_size)
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
impl ChunkId {
|
||||
fn new(object_id: &ObjectId, chunk_index: usize, shard_index: usize, is_parity: bool) -> Self {
|
||||
if is_parity {
|
||||
Self::parity_shard(object_id.to_string(), chunk_index, shard_index)
|
||||
} else {
|
||||
Self::data_shard(object_id.to_string(), chunk_index, shard_index)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl StorageBackend for ErasureCodedBackend {
|
||||
async fn put_object(&self, object_id: &ObjectId, data: Bytes) -> StorageResult<()> {
|
||||
let original_size = data.len() as u64;
|
||||
debug!(object_id = %object_id, size = original_size, "Putting object with erasure coding");
|
||||
|
||||
// Split data into chunks
|
||||
let chunks = self.chunk_manager.split(&data);
|
||||
let chunk_count = chunks.len();
|
||||
let chunk_size = self.chunk_manager.chunk_size();
|
||||
|
||||
// Write each chunk
|
||||
for (chunk_idx, chunk_data) in chunks.into_iter().enumerate() {
|
||||
self.write_chunk(object_id, chunk_idx, &chunk_data).await?;
|
||||
}
|
||||
|
||||
// Store metadata
|
||||
let metadata = ObjectMetadata::new(original_size, chunk_count, chunk_size);
|
||||
self.write_metadata(object_id, &metadata).await?;
|
||||
|
||||
debug!(
|
||||
object_id = %object_id,
|
||||
chunk_count,
|
||||
original_size,
|
||||
"Successfully stored object with erasure coding"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_object(&self, object_id: &ObjectId) -> StorageResult<Bytes> {
|
||||
debug!(object_id = %object_id, "Getting object with erasure decoding");
|
||||
|
||||
// Read metadata to get chunk count and original size
|
||||
let metadata = self.read_metadata(object_id).await?;
|
||||
|
||||
debug!(
|
||||
object_id = %object_id,
|
||||
chunk_count = metadata.chunk_count,
|
||||
original_size = metadata.original_size,
|
||||
"Read object metadata"
|
||||
);
|
||||
|
||||
// Read all chunks and reassemble
|
||||
let mut all_data = Vec::with_capacity(metadata.original_size as usize);
|
||||
|
||||
for chunk_idx in 0..metadata.chunk_count {
|
||||
// Calculate the expected size for this chunk
|
||||
let remaining = metadata.original_size as usize - all_data.len();
|
||||
let expected_chunk_size = remaining.min(metadata.chunk_size);
|
||||
|
||||
let chunk_data = self
|
||||
.read_chunk(object_id, chunk_idx, expected_chunk_size)
|
||||
.await?;
|
||||
|
||||
// Only take what we need (handles padding)
|
||||
let take_size = remaining.min(chunk_data.len());
|
||||
all_data.extend_from_slice(&chunk_data[..take_size]);
|
||||
}
|
||||
|
||||
// Truncate to original size in case of any padding
|
||||
all_data.truncate(metadata.original_size as usize);
|
||||
|
||||
debug!(
|
||||
object_id = %object_id,
|
||||
retrieved_size = all_data.len(),
|
||||
"Successfully retrieved object"
|
||||
);
|
||||
|
||||
Ok(Bytes::from(all_data))
|
||||
}
|
||||
|
||||
async fn delete_object(&self, object_id: &ObjectId) -> StorageResult<()> {
|
||||
debug!(object_id = %object_id, "Deleting object shards");
|
||||
|
||||
// Try to read metadata to know how many chunks to delete
|
||||
let chunk_count = match self.read_metadata(object_id).await {
|
||||
Ok(metadata) => metadata.chunk_count,
|
||||
Err(_) => 1, // Fall back to single chunk if metadata not found
|
||||
};
|
||||
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_all_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
// Delete all shards for all chunks from all nodes (best effort)
|
||||
let mut delete_futures = Vec::new();
|
||||
for chunk_idx in 0..chunk_count {
|
||||
for shard_idx in 0..self.total_shards() {
|
||||
let is_parity = shard_idx >= self.data_shards;
|
||||
let chunk_id = ChunkId::new(object_id, chunk_idx, shard_idx, is_parity);
|
||||
let chunk_key = chunk_id.to_key();
|
||||
|
||||
for node in &nodes {
|
||||
let node = node.clone();
|
||||
let key = chunk_key.clone();
|
||||
delete_futures.push(async move {
|
||||
if let Err(e) = node.delete_chunk(&key).await {
|
||||
// Log but don't fail - best effort deletion
|
||||
warn!(node_id = node.node_id(), chunk_key = key, error = ?e, "Failed to delete shard");
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
futures::future::join_all(delete_futures).await;
|
||||
|
||||
// Delete metadata
|
||||
self.delete_metadata(object_id).await;
|
||||
|
||||
debug!(object_id = %object_id, chunk_count, "Deleted object");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn object_exists(&self, object_id: &ObjectId) -> StorageResult<bool> {
|
||||
// Check if metadata exists
|
||||
Ok(self.read_metadata(object_id).await.is_ok())
|
||||
}
|
||||
|
||||
async fn object_size(&self, object_id: &ObjectId) -> StorageResult<u64> {
|
||||
// Read metadata to get original size
|
||||
let metadata = self.read_metadata(object_id).await?;
|
||||
Ok(metadata.original_size)
|
||||
}
|
||||
|
||||
async fn put_part(
|
||||
&self,
|
||||
upload_id: &str,
|
||||
part_number: u32,
|
||||
data: Bytes,
|
||||
) -> StorageResult<()> {
|
||||
// Use a deterministic part key based on upload_id and part_number
|
||||
let part_key = format!("part_{}_{}", upload_id, part_number);
|
||||
let nodes = self.select_nodes_for_write().await?;
|
||||
|
||||
// Encode and store the part data
|
||||
let shards = self
|
||||
.codec
|
||||
.encode(&data)
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
let mut write_futures = Vec::with_capacity(self.total_shards());
|
||||
for (shard_idx, (shard_data, node)) in shards.into_iter().zip(nodes.iter()).enumerate() {
|
||||
let is_parity = shard_idx >= self.data_shards;
|
||||
let key = format!("{}_{}_{}", part_key, shard_idx, if is_parity { "p" } else { "d" });
|
||||
let node = node.clone();
|
||||
let shard_bytes = Bytes::from(shard_data);
|
||||
|
||||
write_futures.push(async move {
|
||||
node.put_chunk(&key, shard_idx as u32, is_parity, shard_bytes).await
|
||||
});
|
||||
}
|
||||
|
||||
let results = futures::future::join_all(write_futures).await;
|
||||
let success_count = results.iter().filter(|r| r.is_ok()).count();
|
||||
|
||||
if success_count < self.data_shards + 1 {
|
||||
return Err(StorageError::Backend(format!(
|
||||
"Failed to write part shards: {} of {} required",
|
||||
success_count, self.data_shards + 1
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_part(&self, upload_id: &str, part_number: u32) -> StorageResult<Bytes> {
|
||||
let part_key = format!("part_{}_{}", upload_id, part_number);
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_healthy_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
// Try to read shards
|
||||
let mut shard_futures = Vec::with_capacity(self.total_shards());
|
||||
for shard_idx in 0..self.total_shards() {
|
||||
let is_parity = shard_idx >= self.data_shards;
|
||||
let key = format!("{}_{}_{}", part_key, shard_idx, if is_parity { "p" } else { "d" });
|
||||
let nodes = nodes.clone();
|
||||
|
||||
shard_futures.push(async move {
|
||||
for node in &nodes {
|
||||
if let Ok(data) = node.get_chunk(&key, shard_idx as u32, is_parity).await {
|
||||
return Some(data);
|
||||
}
|
||||
}
|
||||
None
|
||||
});
|
||||
}
|
||||
|
||||
let shard_results: Vec<Option<Vec<u8>>> = futures::future::join_all(shard_futures).await;
|
||||
let available = shard_results.iter().filter(|s| s.is_some()).count();
|
||||
|
||||
if available < self.data_shards {
|
||||
return Err(StorageError::Backend(format!(
|
||||
"Part {}:{} not found (insufficient shards)",
|
||||
upload_id, part_number
|
||||
)));
|
||||
}
|
||||
|
||||
// Get shard size from first available shard
|
||||
let shard_size = shard_results
|
||||
.iter()
|
||||
.find_map(|s| s.as_ref().map(|v| v.len()))
|
||||
.unwrap_or(0);
|
||||
let original_size = self.data_shards * shard_size;
|
||||
|
||||
let data = self
|
||||
.codec
|
||||
.decode(shard_results, original_size)
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
Ok(Bytes::from(data))
|
||||
}
|
||||
|
||||
async fn delete_part(&self, upload_id: &str, part_number: u32) -> StorageResult<()> {
|
||||
let part_key = format!("part_{}_{}", upload_id, part_number);
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_all_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
let mut delete_futures = Vec::new();
|
||||
for shard_idx in 0..self.total_shards() {
|
||||
let is_parity = shard_idx >= self.data_shards;
|
||||
let key = format!("{}_{}_{}", part_key, shard_idx, if is_parity { "p" } else { "d" });
|
||||
|
||||
for node in &nodes {
|
||||
let node = node.clone();
|
||||
let key = key.clone();
|
||||
delete_futures.push(async move {
|
||||
let _ = node.delete_chunk(&key).await;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
futures::future::join_all(delete_futures).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_upload_parts(&self, _upload_id: &str) -> StorageResult<()> {
|
||||
// Would need to track part numbers in metadata
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::config::{ChunkConfig, RedundancyMode};
|
||||
use crate::node::MockNodeRegistry;
|
||||
|
||||
fn create_ec_config(data_shards: usize, parity_shards: usize) -> DistributedConfig {
|
||||
DistributedConfig {
|
||||
redundancy: RedundancyMode::ErasureCoded {
|
||||
data_shards,
|
||||
parity_shards,
|
||||
},
|
||||
chunk: ChunkConfig::default(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ec_backend_creation() {
|
||||
let config = create_ec_config(4, 2);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(6));
|
||||
|
||||
let backend = ErasureCodedBackend::new(config, registry).await.unwrap();
|
||||
|
||||
assert_eq!(backend.total_shards(), 6);
|
||||
assert_eq!(backend.min_shards_for_read(), 4);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ec_backend_put_get() {
|
||||
let config = create_ec_config(4, 2);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(6));
|
||||
|
||||
let backend = ErasureCodedBackend::new(config, registry.clone()).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 1024]);
|
||||
|
||||
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||
|
||||
// Verify shards were written to nodes
|
||||
let nodes = registry.all_mock_nodes();
|
||||
let total_chunks: usize = nodes.iter().map(|n| n.chunk_count()).sum();
|
||||
assert!(total_chunks >= 4); // At least data shards should be stored
|
||||
|
||||
// Get the object back
|
||||
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||
|
||||
// The retrieved data might have padding, but should contain original data
|
||||
assert!(retrieved.len() >= data.len());
|
||||
assert_eq!(&retrieved[..data.len()], &data[..]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ec_backend_tolerates_failures() {
|
||||
let config = create_ec_config(4, 2);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(6));
|
||||
|
||||
let backend = ErasureCodedBackend::new(config, registry.clone()).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 512]);
|
||||
|
||||
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||
|
||||
// Fail 2 nodes (within parity tolerance)
|
||||
let nodes = registry.all_mock_nodes();
|
||||
nodes[0].set_healthy(false);
|
||||
nodes[1].set_healthy(false);
|
||||
|
||||
// Should still be able to read
|
||||
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||
assert!(retrieved.len() >= data.len());
|
||||
assert_eq!(&retrieved[..data.len()], &data[..]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ec_backend_delete() {
|
||||
let config = create_ec_config(4, 2);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(6));
|
||||
|
||||
let backend = ErasureCodedBackend::new(config, registry.clone()).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 256]);
|
||||
|
||||
backend.put_object(&object_id, data).await.unwrap();
|
||||
assert!(backend.object_exists(&object_id).await.unwrap());
|
||||
|
||||
backend.delete_object(&object_id).await.unwrap();
|
||||
|
||||
// After deletion, object should not exist
|
||||
// (All shards should be deleted)
|
||||
assert!(!backend.object_exists(&object_id).await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ec_backend_not_enough_nodes() {
|
||||
let config = create_ec_config(4, 2);
|
||||
// Only 3 nodes, but need 6 for 4+2 EC
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||
|
||||
let backend = ErasureCodedBackend::new(config, registry).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 256]);
|
||||
|
||||
let result = backend.put_object(&object_id, data).await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ec_backend_multi_chunk() {
|
||||
// Create config with small chunk size to force multiple chunks
|
||||
let config = DistributedConfig {
|
||||
redundancy: RedundancyMode::ErasureCoded {
|
||||
data_shards: 4,
|
||||
parity_shards: 2,
|
||||
},
|
||||
chunk: ChunkConfig::new(1024), // 1 KB chunks
|
||||
..Default::default()
|
||||
};
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(6));
|
||||
|
||||
let backend = ErasureCodedBackend::new(config, registry.clone()).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
// Create data larger than chunk size (3 KB = 3 chunks)
|
||||
let data: Vec<u8> = (0..3072).map(|i| (i % 256) as u8).collect();
|
||||
let data = Bytes::from(data);
|
||||
|
||||
// Store the object
|
||||
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||
|
||||
// Verify it exists
|
||||
assert!(backend.object_exists(&object_id).await.unwrap());
|
||||
|
||||
// Get object size
|
||||
let size = backend.object_size(&object_id).await.unwrap();
|
||||
assert_eq!(size, 3072);
|
||||
|
||||
// Retrieve the object
|
||||
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||
|
||||
// Verify data integrity
|
||||
assert_eq!(retrieved.len(), data.len());
|
||||
assert_eq!(retrieved, data);
|
||||
|
||||
// Delete the object
|
||||
backend.delete_object(&object_id).await.unwrap();
|
||||
|
||||
// Verify it's deleted
|
||||
assert!(!backend.object_exists(&object_id).await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ec_backend_multi_chunk_with_failures() {
|
||||
// Create config with small chunk size
|
||||
let config = DistributedConfig {
|
||||
redundancy: RedundancyMode::ErasureCoded {
|
||||
data_shards: 4,
|
||||
parity_shards: 2,
|
||||
},
|
||||
chunk: ChunkConfig::new(512), // 512 byte chunks
|
||||
..Default::default()
|
||||
};
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(6));
|
||||
|
||||
let backend = ErasureCodedBackend::new(config, registry.clone()).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
// 2 KB = 4 chunks with 512 byte chunk size
|
||||
let data: Vec<u8> = (0..2048).map(|i| (i % 256) as u8).collect();
|
||||
let data = Bytes::from(data);
|
||||
|
||||
// Store the object
|
||||
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||
|
||||
// Fail 2 nodes (within parity tolerance)
|
||||
let nodes = registry.all_mock_nodes();
|
||||
nodes[0].set_healthy(false);
|
||||
nodes[1].set_healthy(false);
|
||||
|
||||
// Should still be able to read
|
||||
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||
assert_eq!(retrieved.len(), data.len());
|
||||
assert_eq!(retrieved, data);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
//! Distributed storage backend implementations
|
||||
//!
|
||||
//! This module provides storage backends that distribute data across
|
||||
//! multiple nodes using either erasure coding or replication.
|
||||
|
||||
pub mod erasure_coded;
|
||||
pub mod replicated;
|
||||
|
||||
pub use erasure_coded::ErasureCodedBackend;
|
||||
pub use replicated::ReplicatedBackend;
|
||||
|
|
@ -0,0 +1,535 @@
|
|||
//! Replicated distributed storage backend
|
||||
//!
|
||||
//! Implements StorageBackend using N-way replication for
|
||||
//! performance-oriented redundancy with read scaling.
|
||||
|
||||
use crate::config::DistributedConfig;
|
||||
use crate::node::{NodeClientTrait, NodeRegistry};
|
||||
use crate::placement::{ConsistentHashSelector, NodeSelector};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use lightningstor_storage::{StorageBackend, StorageError, StorageResult};
|
||||
use lightningstor_types::ObjectId;
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, error, warn};
|
||||
|
||||
/// Replicated storage backend with N-way replication
|
||||
///
|
||||
/// Stores objects by replicating them to N nodes. Provides:
|
||||
/// - Fast reads (any replica can serve)
|
||||
/// - Simple failure handling (no reconstruction needed)
|
||||
/// - Higher storage overhead than erasure coding
|
||||
pub struct ReplicatedBackend {
|
||||
/// Node registry for discovering storage nodes
|
||||
node_registry: Arc<dyn NodeRegistry>,
|
||||
/// Node selector for placement decisions
|
||||
node_selector: Arc<dyn NodeSelector>,
|
||||
/// Configuration (kept for future use)
|
||||
#[allow(dead_code)]
|
||||
config: DistributedConfig,
|
||||
/// Number of replicas
|
||||
replica_count: usize,
|
||||
/// Read quorum (minimum replicas for successful read)
|
||||
read_quorum: usize,
|
||||
/// Write quorum (minimum replicas for successful write)
|
||||
write_quorum: usize,
|
||||
}
|
||||
|
||||
impl ReplicatedBackend {
|
||||
/// Create a new replicated backend
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `config` - Distributed storage configuration (must have Replicated redundancy mode)
|
||||
/// * `node_registry` - Registry for discovering storage nodes
|
||||
pub async fn new(
|
||||
config: DistributedConfig,
|
||||
node_registry: Arc<dyn NodeRegistry>,
|
||||
) -> StorageResult<Self> {
|
||||
let (replica_count, read_quorum, write_quorum) = match &config.redundancy {
|
||||
crate::config::RedundancyMode::Replicated {
|
||||
replica_count,
|
||||
read_quorum,
|
||||
write_quorum,
|
||||
} => (*replica_count, *read_quorum, *write_quorum),
|
||||
_ => {
|
||||
return Err(StorageError::Backend(
|
||||
"ReplicatedBackend requires Replicated redundancy mode".into(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let node_selector = Arc::new(ConsistentHashSelector::new());
|
||||
|
||||
Ok(Self {
|
||||
node_registry,
|
||||
node_selector,
|
||||
config,
|
||||
replica_count,
|
||||
read_quorum,
|
||||
write_quorum,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the number of replicas
|
||||
pub fn replica_count(&self) -> usize {
|
||||
self.replica_count
|
||||
}
|
||||
|
||||
/// Get the read quorum
|
||||
pub fn read_quorum(&self) -> usize {
|
||||
self.read_quorum
|
||||
}
|
||||
|
||||
/// Get the write quorum
|
||||
pub fn write_quorum(&self) -> usize {
|
||||
self.write_quorum
|
||||
}
|
||||
|
||||
/// Select nodes for writing replicas
|
||||
async fn select_replica_nodes(&self) -> StorageResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_healthy_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
if nodes.len() < self.replica_count {
|
||||
return Err(StorageError::Backend(format!(
|
||||
"Not enough healthy nodes: need {}, have {}",
|
||||
self.replica_count,
|
||||
nodes.len()
|
||||
)));
|
||||
}
|
||||
|
||||
self.node_selector
|
||||
.select_nodes(&nodes, self.replica_count)
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))
|
||||
}
|
||||
|
||||
/// Generate the chunk key for an object
|
||||
fn object_key(object_id: &ObjectId) -> String {
|
||||
format!("obj_{}", object_id)
|
||||
}
|
||||
|
||||
/// Generate the chunk key for a part
|
||||
fn part_key(upload_id: &str, part_number: u32) -> String {
|
||||
format!("part_{}_{}", upload_id, part_number)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl StorageBackend for ReplicatedBackend {
|
||||
async fn put_object(&self, object_id: &ObjectId, data: Bytes) -> StorageResult<()> {
|
||||
debug!(
|
||||
object_id = %object_id,
|
||||
size = data.len(),
|
||||
replicas = self.replica_count,
|
||||
"Putting object with replication"
|
||||
);
|
||||
|
||||
let nodes = self.select_replica_nodes().await?;
|
||||
let chunk_key = Self::object_key(object_id);
|
||||
|
||||
// Write to all replicas in parallel
|
||||
let mut write_futures = Vec::with_capacity(self.replica_count);
|
||||
for node in nodes.iter() {
|
||||
let node = node.clone();
|
||||
let key = chunk_key.clone();
|
||||
let data = data.clone();
|
||||
|
||||
write_futures.push(async move { node.put_chunk(&key, 0, false, data).await });
|
||||
}
|
||||
|
||||
let results = futures::future::join_all(write_futures).await;
|
||||
let success_count = results.iter().filter(|r| r.is_ok()).count();
|
||||
let error_count = results.len() - success_count;
|
||||
|
||||
debug!(
|
||||
object_id = %object_id,
|
||||
success_count,
|
||||
error_count,
|
||||
write_quorum = self.write_quorum,
|
||||
"Wrote replicas"
|
||||
);
|
||||
|
||||
// Need write quorum for success
|
||||
if success_count < self.write_quorum {
|
||||
let errors: Vec<_> = results.into_iter().filter_map(|r| r.err()).collect();
|
||||
error!(
|
||||
success_count,
|
||||
write_quorum = self.write_quorum,
|
||||
errors = ?errors,
|
||||
"Failed to write quorum"
|
||||
);
|
||||
return Err(StorageError::Backend(format!(
|
||||
"Failed to write quorum: {} of {} required succeeded",
|
||||
success_count, self.write_quorum
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_object(&self, object_id: &ObjectId) -> StorageResult<Bytes> {
|
||||
debug!(object_id = %object_id, "Getting object from replicas");
|
||||
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_healthy_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
let chunk_key = Self::object_key(object_id);
|
||||
|
||||
// Try to read from the preferred node first (for cache efficiency)
|
||||
if let Ok(preferred) = self.node_selector.select_for_read(&nodes, &chunk_key).await {
|
||||
match preferred.get_chunk(&chunk_key, 0, false).await {
|
||||
Ok(data) => {
|
||||
debug!(
|
||||
object_id = %object_id,
|
||||
node_id = preferred.node_id(),
|
||||
"Read from preferred node"
|
||||
);
|
||||
return Ok(Bytes::from(data));
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
object_id = %object_id,
|
||||
node_id = preferred.node_id(),
|
||||
error = ?e,
|
||||
"Failed to read from preferred node, trying others"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try other nodes
|
||||
for node in nodes.iter() {
|
||||
match node.get_chunk(&chunk_key, 0, false).await {
|
||||
Ok(data) => {
|
||||
debug!(
|
||||
object_id = %object_id,
|
||||
node_id = node.node_id(),
|
||||
"Read from fallback node"
|
||||
);
|
||||
return Ok(Bytes::from(data));
|
||||
}
|
||||
Err(_) => continue,
|
||||
}
|
||||
}
|
||||
|
||||
Err(StorageError::NotFound(*object_id))
|
||||
}
|
||||
|
||||
async fn delete_object(&self, object_id: &ObjectId) -> StorageResult<()> {
|
||||
debug!(object_id = %object_id, "Deleting object from all replicas");
|
||||
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_all_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
let chunk_key = Self::object_key(object_id);
|
||||
|
||||
// Delete from all nodes (best effort)
|
||||
let mut delete_futures = Vec::new();
|
||||
for node in &nodes {
|
||||
let node = node.clone();
|
||||
let key = chunk_key.clone();
|
||||
delete_futures.push(async move {
|
||||
if let Err(e) = node.delete_chunk(&key).await {
|
||||
warn!(
|
||||
node_id = node.node_id(),
|
||||
chunk_key = key,
|
||||
error = ?e,
|
||||
"Failed to delete replica"
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
futures::future::join_all(delete_futures).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn object_exists(&self, object_id: &ObjectId) -> StorageResult<bool> {
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_healthy_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
let chunk_key = Self::object_key(object_id);
|
||||
|
||||
for node in &nodes {
|
||||
if let Ok(true) = node.chunk_exists(&chunk_key).await {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
async fn object_size(&self, object_id: &ObjectId) -> StorageResult<u64> {
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_healthy_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
let chunk_key = Self::object_key(object_id);
|
||||
|
||||
for node in &nodes {
|
||||
if let Ok(Some(size)) = node.chunk_size(&chunk_key).await {
|
||||
return Ok(size);
|
||||
}
|
||||
}
|
||||
|
||||
Err(StorageError::NotFound(*object_id))
|
||||
}
|
||||
|
||||
async fn put_part(
|
||||
&self,
|
||||
upload_id: &str,
|
||||
part_number: u32,
|
||||
data: Bytes,
|
||||
) -> StorageResult<()> {
|
||||
debug!(
|
||||
upload_id,
|
||||
part_number,
|
||||
size = data.len(),
|
||||
"Putting multipart part with replication"
|
||||
);
|
||||
|
||||
let nodes = self.select_replica_nodes().await?;
|
||||
let chunk_key = Self::part_key(upload_id, part_number);
|
||||
|
||||
// Write to all replicas in parallel
|
||||
let mut write_futures = Vec::with_capacity(self.replica_count);
|
||||
for node in nodes.iter() {
|
||||
let node = node.clone();
|
||||
let key = chunk_key.clone();
|
||||
let data = data.clone();
|
||||
|
||||
write_futures.push(async move { node.put_chunk(&key, part_number, false, data).await });
|
||||
}
|
||||
|
||||
let results = futures::future::join_all(write_futures).await;
|
||||
let success_count = results.iter().filter(|r| r.is_ok()).count();
|
||||
|
||||
if success_count < self.write_quorum {
|
||||
return Err(StorageError::Backend(format!(
|
||||
"Failed to write part quorum: {} of {} required",
|
||||
success_count, self.write_quorum
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_part(&self, upload_id: &str, part_number: u32) -> StorageResult<Bytes> {
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_healthy_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
let chunk_key = Self::part_key(upload_id, part_number);
|
||||
|
||||
// Try nodes until we get a successful read
|
||||
for node in nodes.iter() {
|
||||
match node.get_chunk(&chunk_key, part_number, false).await {
|
||||
Ok(data) => return Ok(Bytes::from(data)),
|
||||
Err(_) => continue,
|
||||
}
|
||||
}
|
||||
|
||||
Err(StorageError::Backend(format!(
|
||||
"Part {}:{} not found on any node",
|
||||
upload_id, part_number
|
||||
)))
|
||||
}
|
||||
|
||||
async fn delete_part(&self, upload_id: &str, part_number: u32) -> StorageResult<()> {
|
||||
let nodes = self
|
||||
.node_registry
|
||||
.get_all_nodes()
|
||||
.await
|
||||
.map_err(|e| StorageError::Backend(e.to_string()))?;
|
||||
|
||||
let chunk_key = Self::part_key(upload_id, part_number);
|
||||
|
||||
// Delete from all nodes (best effort)
|
||||
let mut delete_futures = Vec::new();
|
||||
for node in &nodes {
|
||||
let node = node.clone();
|
||||
let key = chunk_key.clone();
|
||||
delete_futures.push(async move {
|
||||
let _ = node.delete_chunk(&key).await;
|
||||
});
|
||||
}
|
||||
|
||||
futures::future::join_all(delete_futures).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_upload_parts(&self, upload_id: &str) -> StorageResult<()> {
|
||||
// Would need to track part numbers in metadata to delete all parts
|
||||
// For now, just log and return success
|
||||
debug!(upload_id, "delete_upload_parts called (no-op without metadata tracking)");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::config::RedundancyMode;
|
||||
use crate::node::MockNodeRegistry;
|
||||
|
||||
fn create_replicated_config(replica_count: usize) -> DistributedConfig {
|
||||
DistributedConfig {
|
||||
redundancy: RedundancyMode::Replicated {
|
||||
replica_count,
|
||||
read_quorum: 1,
|
||||
write_quorum: (replica_count / 2) + 1,
|
||||
},
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replicated_backend_creation() {
|
||||
let config = create_replicated_config(3);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||
|
||||
let backend = ReplicatedBackend::new(config, registry).await.unwrap();
|
||||
|
||||
assert_eq!(backend.replica_count(), 3);
|
||||
assert_eq!(backend.read_quorum(), 1);
|
||||
assert_eq!(backend.write_quorum(), 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replicated_backend_put_get() {
|
||||
let config = create_replicated_config(3);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||
|
||||
let backend = ReplicatedBackend::new(config, registry.clone()).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 1024]);
|
||||
|
||||
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||
|
||||
// Verify data was written to nodes
|
||||
let nodes = registry.all_mock_nodes();
|
||||
let total_chunks: usize = nodes.iter().map(|n| n.chunk_count()).sum();
|
||||
assert!(total_chunks >= 2); // At least write_quorum nodes
|
||||
|
||||
// Get the object back
|
||||
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||
assert_eq!(retrieved, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replicated_backend_tolerates_minority_failure() {
|
||||
let config = create_replicated_config(3);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||
|
||||
let backend = ReplicatedBackend::new(config, registry.clone()).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 512]);
|
||||
|
||||
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||
|
||||
// Fail 1 node (minority)
|
||||
let nodes = registry.all_mock_nodes();
|
||||
nodes[0].set_healthy(false);
|
||||
|
||||
// Should still be able to read from healthy nodes
|
||||
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||
assert_eq!(retrieved, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replicated_backend_write_quorum_failure() {
|
||||
let config = create_replicated_config(3);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||
|
||||
let backend = ReplicatedBackend::new(config, registry.clone()).await.unwrap();
|
||||
|
||||
// Fail 2 nodes (below write quorum of 2)
|
||||
let nodes = registry.all_mock_nodes();
|
||||
nodes[0].set_healthy(false);
|
||||
nodes[1].set_healthy(false);
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 256]);
|
||||
|
||||
// Write should fail due to insufficient healthy nodes
|
||||
let result = backend.put_object(&object_id, data).await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replicated_backend_delete() {
|
||||
let config = create_replicated_config(3);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||
|
||||
let backend = ReplicatedBackend::new(config, registry.clone()).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 256]);
|
||||
|
||||
backend.put_object(&object_id, data).await.unwrap();
|
||||
assert!(backend.object_exists(&object_id).await.unwrap());
|
||||
|
||||
backend.delete_object(&object_id).await.unwrap();
|
||||
assert!(!backend.object_exists(&object_id).await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replicated_backend_object_size() {
|
||||
let config = create_replicated_config(3);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||
|
||||
let backend = ReplicatedBackend::new(config, registry).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 1234]);
|
||||
|
||||
backend.put_object(&object_id, data).await.unwrap();
|
||||
|
||||
let size = backend.object_size(&object_id).await.unwrap();
|
||||
assert_eq!(size, 1234);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replicated_backend_multipart() {
|
||||
let config = create_replicated_config(3);
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||
|
||||
let backend = ReplicatedBackend::new(config, registry).await.unwrap();
|
||||
|
||||
let upload_id = "test-upload-123";
|
||||
let part1 = Bytes::from(vec![1u8; 1024]);
|
||||
let part2 = Bytes::from(vec![2u8; 1024]);
|
||||
|
||||
backend.put_part(upload_id, 1, part1.clone()).await.unwrap();
|
||||
backend.put_part(upload_id, 2, part2.clone()).await.unwrap();
|
||||
|
||||
let retrieved1 = backend.get_part(upload_id, 1).await.unwrap();
|
||||
let retrieved2 = backend.get_part(upload_id, 2).await.unwrap();
|
||||
|
||||
assert_eq!(retrieved1, part1);
|
||||
assert_eq!(retrieved2, part2);
|
||||
|
||||
backend.delete_part(upload_id, 1).await.unwrap();
|
||||
let result = backend.get_part(upload_id, 1).await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
276
lightningstor/crates/lightningstor-distributed/src/chunk/mod.rs
Normal file
276
lightningstor/crates/lightningstor-distributed/src/chunk/mod.rs
Normal file
|
|
@ -0,0 +1,276 @@
|
|||
//! Chunk management for distributed storage
|
||||
//!
|
||||
//! This module handles splitting large objects into fixed-size chunks
|
||||
//! and reassembling them back into the original data.
|
||||
|
||||
use crate::config::ChunkConfig;
|
||||
|
||||
/// Manages chunk operations for large objects
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ChunkManager {
|
||||
config: ChunkConfig,
|
||||
}
|
||||
|
||||
impl ChunkManager {
|
||||
/// Create a new chunk manager with the given configuration
|
||||
pub fn new(config: ChunkConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Create a new chunk manager with default configuration
|
||||
pub fn with_defaults() -> Self {
|
||||
Self::new(ChunkConfig::default())
|
||||
}
|
||||
|
||||
/// Get the chunk size in bytes
|
||||
pub fn chunk_size(&self) -> usize {
|
||||
self.config.chunk_size
|
||||
}
|
||||
|
||||
/// Split data into chunks
|
||||
///
|
||||
/// Returns a vector of chunks. Each chunk is at most `chunk_size` bytes,
|
||||
/// except the last chunk which may be smaller.
|
||||
pub fn split(&self, data: &[u8]) -> Vec<Vec<u8>> {
|
||||
if data.is_empty() {
|
||||
return vec![vec![]];
|
||||
}
|
||||
|
||||
data.chunks(self.config.chunk_size)
|
||||
.map(|c| c.to_vec())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Reassemble chunks into original data
|
||||
///
|
||||
/// Chunks must be in order and complete.
|
||||
pub fn reassemble(&self, chunks: Vec<Vec<u8>>) -> Vec<u8> {
|
||||
chunks.into_iter().flatten().collect()
|
||||
}
|
||||
|
||||
/// Calculate the number of chunks for a given data size
|
||||
pub fn chunk_count(&self, size: usize) -> usize {
|
||||
if size == 0 {
|
||||
return 1;
|
||||
}
|
||||
(size + self.config.chunk_size - 1) / self.config.chunk_size
|
||||
}
|
||||
|
||||
/// Calculate the size of a specific chunk
|
||||
///
|
||||
/// Returns the size of the chunk at the given index for data of the given total size.
|
||||
pub fn chunk_size_at(&self, total_size: usize, chunk_index: usize) -> usize {
|
||||
let full_chunks = total_size / self.config.chunk_size;
|
||||
let remainder = total_size % self.config.chunk_size;
|
||||
|
||||
if chunk_index < full_chunks {
|
||||
self.config.chunk_size
|
||||
} else if chunk_index == full_chunks && remainder > 0 {
|
||||
remainder
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the byte range for a specific chunk
|
||||
///
|
||||
/// Returns (start_offset, length) for the chunk at the given index.
|
||||
pub fn chunk_range(&self, total_size: usize, chunk_index: usize) -> (usize, usize) {
|
||||
let start = chunk_index * self.config.chunk_size;
|
||||
let length = self.chunk_size_at(total_size, chunk_index);
|
||||
(start, length)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ChunkManager {
|
||||
fn default() -> Self {
|
||||
Self::with_defaults()
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a chunk identifier
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct ChunkId {
|
||||
/// The object ID this chunk belongs to
|
||||
pub object_id: String,
|
||||
/// The chunk index within the object
|
||||
pub chunk_index: usize,
|
||||
/// The shard index (for erasure coding)
|
||||
pub shard_index: usize,
|
||||
/// Whether this is a parity shard
|
||||
pub is_parity: bool,
|
||||
}
|
||||
|
||||
impl ChunkId {
|
||||
/// Create a new chunk ID for a simple (non-sharded) chunk
|
||||
pub fn simple(object_id: impl Into<String>, chunk_index: usize) -> Self {
|
||||
Self {
|
||||
object_id: object_id.into(),
|
||||
chunk_index,
|
||||
shard_index: 0,
|
||||
is_parity: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new chunk ID for a data shard
|
||||
pub fn data_shard(
|
||||
object_id: impl Into<String>,
|
||||
chunk_index: usize,
|
||||
shard_index: usize,
|
||||
) -> Self {
|
||||
Self {
|
||||
object_id: object_id.into(),
|
||||
chunk_index,
|
||||
shard_index,
|
||||
is_parity: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new chunk ID for a parity shard
|
||||
pub fn parity_shard(
|
||||
object_id: impl Into<String>,
|
||||
chunk_index: usize,
|
||||
shard_index: usize,
|
||||
) -> Self {
|
||||
Self {
|
||||
object_id: object_id.into(),
|
||||
chunk_index,
|
||||
shard_index,
|
||||
is_parity: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert to a string key for storage
|
||||
pub fn to_key(&self) -> String {
|
||||
format!(
|
||||
"{}_{}_{}_{}",
|
||||
self.object_id,
|
||||
self.chunk_index,
|
||||
self.shard_index,
|
||||
if self.is_parity { "p" } else { "d" }
|
||||
)
|
||||
}
|
||||
|
||||
/// Parse from a string key
|
||||
pub fn from_key(key: &str) -> Option<Self> {
|
||||
let parts: Vec<&str> = key.rsplitn(4, '_').collect();
|
||||
if parts.len() != 4 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let is_parity = parts[0] == "p";
|
||||
let shard_index = parts[1].parse().ok()?;
|
||||
let chunk_index = parts[2].parse().ok()?;
|
||||
let object_id = parts[3].to_string();
|
||||
|
||||
Some(Self {
|
||||
object_id,
|
||||
chunk_index,
|
||||
shard_index,
|
||||
is_parity,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ChunkId {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.to_key())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_split_empty() {
|
||||
let manager = ChunkManager::with_defaults();
|
||||
let chunks = manager.split(&[]);
|
||||
assert_eq!(chunks.len(), 1);
|
||||
assert!(chunks[0].is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_smaller_than_chunk() {
|
||||
let manager = ChunkManager::new(ChunkConfig::new(1024));
|
||||
let data = vec![42u8; 512];
|
||||
let chunks = manager.split(&data);
|
||||
assert_eq!(chunks.len(), 1);
|
||||
assert_eq!(chunks[0], data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_exact_chunk_boundary() {
|
||||
let manager = ChunkManager::new(ChunkConfig::new(1024));
|
||||
let data = vec![42u8; 2048];
|
||||
let chunks = manager.split(&data);
|
||||
assert_eq!(chunks.len(), 2);
|
||||
assert_eq!(chunks[0].len(), 1024);
|
||||
assert_eq!(chunks[1].len(), 1024);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_partial_last_chunk() {
|
||||
let manager = ChunkManager::new(ChunkConfig::new(1024));
|
||||
let data = vec![42u8; 1500];
|
||||
let chunks = manager.split(&data);
|
||||
assert_eq!(chunks.len(), 2);
|
||||
assert_eq!(chunks[0].len(), 1024);
|
||||
assert_eq!(chunks[1].len(), 476);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reassemble_preserves_data() {
|
||||
let manager = ChunkManager::new(ChunkConfig::new(1024));
|
||||
let original: Vec<u8> = (0..2500).map(|i| (i % 256) as u8).collect();
|
||||
let chunks = manager.split(&original);
|
||||
let reassembled = manager.reassemble(chunks);
|
||||
assert_eq!(original, reassembled);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chunk_count() {
|
||||
let manager = ChunkManager::new(ChunkConfig::new(1024));
|
||||
assert_eq!(manager.chunk_count(0), 1);
|
||||
assert_eq!(manager.chunk_count(512), 1);
|
||||
assert_eq!(manager.chunk_count(1024), 1);
|
||||
assert_eq!(manager.chunk_count(1025), 2);
|
||||
assert_eq!(manager.chunk_count(2048), 2);
|
||||
assert_eq!(manager.chunk_count(3000), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chunk_size_at() {
|
||||
let manager = ChunkManager::new(ChunkConfig::new(1024));
|
||||
// 2500 bytes = 2 full chunks (1024) + 1 partial (452)
|
||||
assert_eq!(manager.chunk_size_at(2500, 0), 1024);
|
||||
assert_eq!(manager.chunk_size_at(2500, 1), 1024);
|
||||
assert_eq!(manager.chunk_size_at(2500, 2), 452);
|
||||
assert_eq!(manager.chunk_size_at(2500, 3), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chunk_range() {
|
||||
let manager = ChunkManager::new(ChunkConfig::new(1024));
|
||||
assert_eq!(manager.chunk_range(2500, 0), (0, 1024));
|
||||
assert_eq!(manager.chunk_range(2500, 1), (1024, 1024));
|
||||
assert_eq!(manager.chunk_range(2500, 2), (2048, 452));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chunk_id_to_key() {
|
||||
let id = ChunkId::data_shard("obj123", 0, 2);
|
||||
assert_eq!(id.to_key(), "obj123_0_2_d");
|
||||
|
||||
let id = ChunkId::parity_shard("obj123", 1, 4);
|
||||
assert_eq!(id.to_key(), "obj123_1_4_p");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chunk_id_roundtrip() {
|
||||
let original = ChunkId::data_shard("my-object", 5, 3);
|
||||
let key = original.to_key();
|
||||
let parsed = ChunkId::from_key(&key).unwrap();
|
||||
assert_eq!(original, parsed);
|
||||
}
|
||||
}
|
||||
288
lightningstor/crates/lightningstor-distributed/src/config.rs
Normal file
288
lightningstor/crates/lightningstor-distributed/src/config.rs
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
//! Configuration types for distributed storage backends
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Redundancy strategy for object storage
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum RedundancyMode {
|
||||
/// No redundancy (local storage only)
|
||||
None,
|
||||
|
||||
/// Reed-Solomon erasure coding
|
||||
ErasureCoded {
|
||||
/// Number of data shards
|
||||
data_shards: usize,
|
||||
/// Number of parity shards
|
||||
parity_shards: usize,
|
||||
},
|
||||
|
||||
/// Simple N-way replication
|
||||
Replicated {
|
||||
/// Number of replicas (including primary)
|
||||
replica_count: usize,
|
||||
/// Read quorum (minimum replicas for successful read)
|
||||
read_quorum: usize,
|
||||
/// Write quorum (minimum replicas for successful write)
|
||||
write_quorum: usize,
|
||||
},
|
||||
}
|
||||
|
||||
impl Default for RedundancyMode {
|
||||
fn default() -> Self {
|
||||
// Default: 4+2 erasure coding (1.5x overhead, tolerates 2 failures)
|
||||
Self::ErasureCoded {
|
||||
data_shards: 4,
|
||||
parity_shards: 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RedundancyMode {
|
||||
/// Create a new erasure coded configuration
|
||||
pub fn erasure_coded(data_shards: usize, parity_shards: usize) -> Self {
|
||||
Self::ErasureCoded {
|
||||
data_shards,
|
||||
parity_shards,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new replicated configuration with default quorums
|
||||
pub fn replicated(replica_count: usize) -> Self {
|
||||
Self::Replicated {
|
||||
replica_count,
|
||||
read_quorum: 1,
|
||||
write_quorum: (replica_count / 2) + 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new replicated configuration with custom quorums
|
||||
pub fn replicated_with_quorum(
|
||||
replica_count: usize,
|
||||
read_quorum: usize,
|
||||
write_quorum: usize,
|
||||
) -> Self {
|
||||
Self::Replicated {
|
||||
replica_count,
|
||||
read_quorum,
|
||||
write_quorum,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the minimum number of nodes required for this redundancy mode
|
||||
pub fn min_nodes(&self) -> usize {
|
||||
match self {
|
||||
Self::None => 1,
|
||||
Self::ErasureCoded {
|
||||
data_shards,
|
||||
parity_shards,
|
||||
} => data_shards + parity_shards,
|
||||
Self::Replicated { replica_count, .. } => *replica_count,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the storage overhead factor (1.0 = no overhead)
|
||||
pub fn overhead_factor(&self) -> f64 {
|
||||
match self {
|
||||
Self::None => 1.0,
|
||||
Self::ErasureCoded {
|
||||
data_shards,
|
||||
parity_shards,
|
||||
} => (*data_shards + *parity_shards) as f64 / *data_shards as f64,
|
||||
Self::Replicated { replica_count, .. } => *replica_count as f64,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the number of node failures that can be tolerated
|
||||
pub fn fault_tolerance(&self) -> usize {
|
||||
match self {
|
||||
Self::None => 0,
|
||||
Self::ErasureCoded { parity_shards, .. } => *parity_shards,
|
||||
Self::Replicated {
|
||||
replica_count,
|
||||
write_quorum,
|
||||
..
|
||||
} => replica_count - write_quorum,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Chunk size configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct ChunkConfig {
|
||||
/// Default chunk size in bytes (default: 8 MiB)
|
||||
#[serde(default = "ChunkConfig::default_chunk_size")]
|
||||
pub chunk_size: usize,
|
||||
/// Minimum chunk size in bytes (default: 1 MiB)
|
||||
#[serde(default = "ChunkConfig::default_min_chunk_size")]
|
||||
pub min_chunk_size: usize,
|
||||
/// Maximum chunk size in bytes (default: 64 MiB)
|
||||
#[serde(default = "ChunkConfig::default_max_chunk_size")]
|
||||
pub max_chunk_size: usize,
|
||||
}
|
||||
|
||||
impl ChunkConfig {
|
||||
const fn default_chunk_size() -> usize {
|
||||
8 * 1024 * 1024 // 8 MiB
|
||||
}
|
||||
|
||||
const fn default_min_chunk_size() -> usize {
|
||||
1024 * 1024 // 1 MiB
|
||||
}
|
||||
|
||||
const fn default_max_chunk_size() -> usize {
|
||||
64 * 1024 * 1024 // 64 MiB
|
||||
}
|
||||
|
||||
/// Create a new chunk configuration with custom chunk size
|
||||
pub fn new(chunk_size: usize) -> Self {
|
||||
Self {
|
||||
chunk_size,
|
||||
min_chunk_size: Self::default_min_chunk_size(),
|
||||
max_chunk_size: Self::default_max_chunk_size(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ChunkConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
chunk_size: Self::default_chunk_size(),
|
||||
min_chunk_size: Self::default_min_chunk_size(),
|
||||
max_chunk_size: Self::default_max_chunk_size(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Distributed storage configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DistributedConfig {
|
||||
/// Redundancy mode
|
||||
#[serde(default)]
|
||||
pub redundancy: RedundancyMode,
|
||||
/// Chunk configuration
|
||||
#[serde(default)]
|
||||
pub chunk: ChunkConfig,
|
||||
/// Node endpoints (for static configuration)
|
||||
#[serde(default)]
|
||||
pub node_endpoints: Vec<String>,
|
||||
/// Registry endpoint (for dynamic discovery via ChainFire)
|
||||
pub registry_endpoint: Option<String>,
|
||||
/// Connection timeout in milliseconds
|
||||
#[serde(default = "DistributedConfig::default_connection_timeout")]
|
||||
pub connection_timeout_ms: u64,
|
||||
/// Request timeout in milliseconds
|
||||
#[serde(default = "DistributedConfig::default_request_timeout")]
|
||||
pub request_timeout_ms: u64,
|
||||
/// Maximum retries for failed operations
|
||||
#[serde(default = "DistributedConfig::default_max_retries")]
|
||||
pub max_retries: u32,
|
||||
}
|
||||
|
||||
impl DistributedConfig {
|
||||
const fn default_connection_timeout() -> u64 {
|
||||
5000 // 5 seconds
|
||||
}
|
||||
|
||||
const fn default_request_timeout() -> u64 {
|
||||
30000 // 30 seconds
|
||||
}
|
||||
|
||||
const fn default_max_retries() -> u32 {
|
||||
3
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DistributedConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
redundancy: RedundancyMode::default(),
|
||||
chunk: ChunkConfig::default(),
|
||||
node_endpoints: vec![],
|
||||
registry_endpoint: None,
|
||||
connection_timeout_ms: Self::default_connection_timeout(),
|
||||
request_timeout_ms: Self::default_request_timeout(),
|
||||
max_retries: Self::default_max_retries(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Bucket-level storage configuration override
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct BucketStorageConfig {
|
||||
/// Override redundancy mode for this bucket
|
||||
pub redundancy: Option<RedundancyMode>,
|
||||
/// Override chunk size for this bucket
|
||||
pub chunk_size: Option<usize>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_redundancy_mode_default() {
|
||||
let mode = RedundancyMode::default();
|
||||
assert!(matches!(
|
||||
mode,
|
||||
RedundancyMode::ErasureCoded {
|
||||
data_shards: 4,
|
||||
parity_shards: 2
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_redundancy_mode_min_nodes() {
|
||||
assert_eq!(RedundancyMode::None.min_nodes(), 1);
|
||||
assert_eq!(RedundancyMode::erasure_coded(4, 2).min_nodes(), 6);
|
||||
assert_eq!(RedundancyMode::replicated(3).min_nodes(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_redundancy_mode_overhead() {
|
||||
assert!((RedundancyMode::None.overhead_factor() - 1.0).abs() < f64::EPSILON);
|
||||
assert!((RedundancyMode::erasure_coded(4, 2).overhead_factor() - 1.5).abs() < f64::EPSILON);
|
||||
assert!((RedundancyMode::replicated(3).overhead_factor() - 3.0).abs() < f64::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_redundancy_mode_fault_tolerance() {
|
||||
assert_eq!(RedundancyMode::None.fault_tolerance(), 0);
|
||||
assert_eq!(RedundancyMode::erasure_coded(4, 2).fault_tolerance(), 2);
|
||||
// replica_count=3, write_quorum=2 -> can tolerate 1 failure
|
||||
assert_eq!(RedundancyMode::replicated(3).fault_tolerance(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chunk_config_default() {
|
||||
let config = ChunkConfig::default();
|
||||
assert_eq!(config.chunk_size, 8 * 1024 * 1024);
|
||||
assert_eq!(config.min_chunk_size, 1024 * 1024);
|
||||
assert_eq!(config.max_chunk_size, 64 * 1024 * 1024);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distributed_config_default() {
|
||||
let config = DistributedConfig::default();
|
||||
assert!(matches!(
|
||||
config.redundancy,
|
||||
RedundancyMode::ErasureCoded { .. }
|
||||
));
|
||||
assert!(config.node_endpoints.is_empty());
|
||||
assert!(config.registry_endpoint.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_redundancy_mode_serialization() {
|
||||
let ec = RedundancyMode::erasure_coded(4, 2);
|
||||
let json = serde_json::to_string(&ec).unwrap();
|
||||
let parsed: RedundancyMode = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(ec, parsed);
|
||||
|
||||
let rep = RedundancyMode::replicated(3);
|
||||
let json = serde_json::to_string(&rep).unwrap();
|
||||
let parsed: RedundancyMode = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(rep, parsed);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,381 @@
|
|||
//! Erasure coding module using Reed-Solomon
|
||||
//!
|
||||
//! This module provides a wrapper around the `reed-solomon-erasure` crate
|
||||
//! for encoding and decoding data using Reed-Solomon erasure codes.
|
||||
|
||||
use reed_solomon_erasure::galois_8::ReedSolomon;
|
||||
use thiserror::Error;
|
||||
|
||||
/// Errors that can occur during erasure coding operations
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ErasureError {
|
||||
#[error("Failed to create Reed-Solomon encoder: {0}")]
|
||||
CreateError(String),
|
||||
|
||||
#[error("Encoding failed: {0}")]
|
||||
EncodeError(String),
|
||||
|
||||
#[error("Decoding failed: {0}")]
|
||||
DecodeError(String),
|
||||
|
||||
#[error("Invalid shard count: expected {expected}, got {actual}")]
|
||||
InvalidShardCount { expected: usize, actual: usize },
|
||||
|
||||
#[error("Not enough shards for reconstruction: need {needed}, have {available}")]
|
||||
NotEnoughShards { needed: usize, available: usize },
|
||||
|
||||
#[error("Shard size mismatch: expected {expected}, got {actual}")]
|
||||
ShardSizeMismatch { expected: usize, actual: usize },
|
||||
}
|
||||
|
||||
/// Result type for erasure coding operations
|
||||
pub type ErasureResult<T> = Result<T, ErasureError>;
|
||||
|
||||
/// Reed-Solomon erasure coding codec
|
||||
///
|
||||
/// Provides encoding and decoding of data using Reed-Solomon erasure codes.
|
||||
/// Data is split into `data_shards` pieces, and `parity_shards` parity pieces
|
||||
/// are generated. Any `data_shards` pieces (data or parity) are sufficient
|
||||
/// to reconstruct the original data.
|
||||
#[derive(Debug)]
|
||||
pub struct Codec {
|
||||
rs: ReedSolomon,
|
||||
data_shards: usize,
|
||||
parity_shards: usize,
|
||||
}
|
||||
|
||||
impl Codec {
|
||||
/// Create a new Reed-Solomon codec
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `data_shards` - Number of data shards (original data is split into this many pieces)
|
||||
/// * `parity_shards` - Number of parity shards (fault tolerance)
|
||||
///
|
||||
/// # Example
|
||||
/// ```
|
||||
/// use lightningstor_distributed::erasure::Codec;
|
||||
///
|
||||
/// // 4+2 configuration: 4 data shards, 2 parity shards
|
||||
/// // Can tolerate loss of any 2 shards
|
||||
/// let codec = Codec::new(4, 2).unwrap();
|
||||
/// ```
|
||||
pub fn new(data_shards: usize, parity_shards: usize) -> ErasureResult<Self> {
|
||||
let rs = ReedSolomon::new(data_shards, parity_shards)
|
||||
.map_err(|e| ErasureError::CreateError(e.to_string()))?;
|
||||
|
||||
Ok(Self {
|
||||
rs,
|
||||
data_shards,
|
||||
parity_shards,
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the number of data shards
|
||||
pub fn data_shards(&self) -> usize {
|
||||
self.data_shards
|
||||
}
|
||||
|
||||
/// Get the number of parity shards
|
||||
pub fn parity_shards(&self) -> usize {
|
||||
self.parity_shards
|
||||
}
|
||||
|
||||
/// Get the total number of shards (data + parity)
|
||||
pub fn total_shards(&self) -> usize {
|
||||
self.data_shards + self.parity_shards
|
||||
}
|
||||
|
||||
/// Calculate the shard size for given data size
|
||||
///
|
||||
/// Each shard will be this size (data is padded if necessary)
|
||||
pub fn shard_size(&self, data_size: usize) -> usize {
|
||||
// Round up to ensure all data fits
|
||||
(data_size + self.data_shards - 1) / self.data_shards
|
||||
}
|
||||
|
||||
/// Encode data into shards
|
||||
///
|
||||
/// Returns a vector of shards: first `data_shards` are data shards,
|
||||
/// remaining `parity_shards` are parity shards.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `data` - The data to encode
|
||||
///
|
||||
/// # Returns
|
||||
/// A vector of `data_shards + parity_shards` shards, each of equal size.
|
||||
pub fn encode(&self, data: &[u8]) -> ErasureResult<Vec<Vec<u8>>> {
|
||||
if data.is_empty() {
|
||||
// Handle empty data - create minimal shards
|
||||
let shard_size = 1;
|
||||
let mut shards: Vec<Vec<u8>> = (0..self.total_shards())
|
||||
.map(|_| vec![0u8; shard_size])
|
||||
.collect();
|
||||
|
||||
self.rs
|
||||
.encode(&mut shards)
|
||||
.map_err(|e| ErasureError::EncodeError(e.to_string()))?;
|
||||
|
||||
return Ok(shards);
|
||||
}
|
||||
|
||||
let shard_size = self.shard_size(data.len());
|
||||
let total_shards = self.total_shards();
|
||||
|
||||
// Create shards with padding
|
||||
let mut shards: Vec<Vec<u8>> = Vec::with_capacity(total_shards);
|
||||
|
||||
// Fill data shards
|
||||
for i in 0..self.data_shards {
|
||||
let start = i * shard_size;
|
||||
let end = std::cmp::min(start + shard_size, data.len());
|
||||
|
||||
let mut shard = vec![0u8; shard_size];
|
||||
if start < data.len() {
|
||||
let copy_len = end - start;
|
||||
shard[..copy_len].copy_from_slice(&data[start..end]);
|
||||
}
|
||||
shards.push(shard);
|
||||
}
|
||||
|
||||
// Create empty parity shards
|
||||
for _ in 0..self.parity_shards {
|
||||
shards.push(vec![0u8; shard_size]);
|
||||
}
|
||||
|
||||
// Encode (fills in parity shards)
|
||||
self.rs
|
||||
.encode(&mut shards)
|
||||
.map_err(|e| ErasureError::EncodeError(e.to_string()))?;
|
||||
|
||||
Ok(shards)
|
||||
}
|
||||
|
||||
/// Decode shards back into original data
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `shards` - Vector of optional shards. `None` indicates a missing shard.
|
||||
/// * `original_size` - The original data size (needed to remove padding)
|
||||
///
|
||||
/// # Returns
|
||||
/// The reconstructed original data.
|
||||
pub fn decode(&self, shards: Vec<Option<Vec<u8>>>, original_size: usize) -> ErasureResult<Vec<u8>> {
|
||||
if shards.len() != self.total_shards() {
|
||||
return Err(ErasureError::InvalidShardCount {
|
||||
expected: self.total_shards(),
|
||||
actual: shards.len(),
|
||||
});
|
||||
}
|
||||
|
||||
// Count available shards
|
||||
let available = shards.iter().filter(|s| s.is_some()).count();
|
||||
if available < self.data_shards {
|
||||
return Err(ErasureError::NotEnoughShards {
|
||||
needed: self.data_shards,
|
||||
available,
|
||||
});
|
||||
}
|
||||
|
||||
// Determine shard size from first available shard
|
||||
let shard_size = shards
|
||||
.iter()
|
||||
.find_map(|s| s.as_ref().map(|v| v.len()))
|
||||
.unwrap_or(1);
|
||||
|
||||
// Verify all shards have same size
|
||||
for shard in shards.iter() {
|
||||
if let Some(s) = shard {
|
||||
if s.len() != shard_size {
|
||||
return Err(ErasureError::ShardSizeMismatch {
|
||||
expected: shard_size,
|
||||
actual: s.len(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to the format expected by reed-solomon-erasure
|
||||
let mut shard_refs: Vec<Option<Vec<u8>>> = shards;
|
||||
|
||||
// Reconstruct missing shards
|
||||
self.rs
|
||||
.reconstruct(&mut shard_refs)
|
||||
.map_err(|e| ErasureError::DecodeError(e.to_string()))?;
|
||||
|
||||
// Reassemble data from data shards
|
||||
let mut result = Vec::with_capacity(original_size);
|
||||
for shard in shard_refs.into_iter().take(self.data_shards) {
|
||||
if let Some(data) = shard {
|
||||
result.extend_from_slice(&data);
|
||||
}
|
||||
}
|
||||
|
||||
// Trim to original size (remove padding)
|
||||
result.truncate(original_size);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Verify that shards are consistent
|
||||
///
|
||||
/// Returns true if parity shards are correct for the given data shards.
|
||||
pub fn verify(&self, shards: &[Vec<u8>]) -> ErasureResult<bool> {
|
||||
if shards.len() != self.total_shards() {
|
||||
return Err(ErasureError::InvalidShardCount {
|
||||
expected: self.total_shards(),
|
||||
actual: shards.len(),
|
||||
});
|
||||
}
|
||||
|
||||
let refs: Vec<&[u8]> = shards.iter().map(|s| s.as_slice()).collect();
|
||||
|
||||
Ok(self.rs.verify(&refs).unwrap_or(false))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_codec_creation() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
assert_eq!(codec.data_shards(), 4);
|
||||
assert_eq!(codec.parity_shards(), 2);
|
||||
assert_eq!(codec.total_shards(), 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_decode_roundtrip() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
let original = b"Hello, World! This is a test of erasure coding.";
|
||||
|
||||
let shards = codec.encode(original).unwrap();
|
||||
assert_eq!(shards.len(), 6);
|
||||
|
||||
// Decode with all shards
|
||||
let all_shards: Vec<Option<Vec<u8>>> = shards.into_iter().map(Some).collect();
|
||||
let decoded = codec.decode(all_shards, original.len()).unwrap();
|
||||
|
||||
assert_eq!(decoded, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_with_missing_data_shards() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
let original = b"Hello, World! This is a test of erasure coding.";
|
||||
|
||||
let shards = codec.encode(original).unwrap();
|
||||
|
||||
// Remove 2 data shards (indices 0 and 1)
|
||||
let mut partial_shards: Vec<Option<Vec<u8>>> = shards.into_iter().map(Some).collect();
|
||||
partial_shards[0] = None;
|
||||
partial_shards[1] = None;
|
||||
|
||||
let decoded = codec.decode(partial_shards, original.len()).unwrap();
|
||||
assert_eq!(decoded, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_with_missing_parity_shards() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
let original = b"Hello, World! This is a test of erasure coding.";
|
||||
|
||||
let shards = codec.encode(original).unwrap();
|
||||
|
||||
// Remove both parity shards (indices 4 and 5)
|
||||
let mut partial_shards: Vec<Option<Vec<u8>>> = shards.into_iter().map(Some).collect();
|
||||
partial_shards[4] = None;
|
||||
partial_shards[5] = None;
|
||||
|
||||
let decoded = codec.decode(partial_shards, original.len()).unwrap();
|
||||
assert_eq!(decoded, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_with_mixed_missing_shards() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
let original = b"Hello, World! This is a test of erasure coding.";
|
||||
|
||||
let shards = codec.encode(original).unwrap();
|
||||
|
||||
// Remove 1 data shard and 1 parity shard
|
||||
let mut partial_shards: Vec<Option<Vec<u8>>> = shards.into_iter().map(Some).collect();
|
||||
partial_shards[2] = None; // data shard
|
||||
partial_shards[5] = None; // parity shard
|
||||
|
||||
let decoded = codec.decode(partial_shards, original.len()).unwrap();
|
||||
assert_eq!(decoded, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_fails_with_too_many_missing() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
let original = b"Hello, World!";
|
||||
|
||||
let shards = codec.encode(original).unwrap();
|
||||
|
||||
// Remove 3 shards (more than parity_shards)
|
||||
let mut partial_shards: Vec<Option<Vec<u8>>> = shards.into_iter().map(Some).collect();
|
||||
partial_shards[0] = None;
|
||||
partial_shards[1] = None;
|
||||
partial_shards[2] = None;
|
||||
|
||||
let result = codec.decode(partial_shards, original.len());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_verify_valid_shards() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
let original = b"Test data for verification";
|
||||
|
||||
let shards = codec.encode(original).unwrap();
|
||||
assert!(codec.verify(&shards).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_verify_corrupted_shards() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
let original = b"Test data for verification";
|
||||
|
||||
let mut shards = codec.encode(original).unwrap();
|
||||
|
||||
// Corrupt a shard
|
||||
shards[0][0] ^= 0xFF;
|
||||
|
||||
assert!(!codec.verify(&shards).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_empty_data() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
let shards = codec.encode(&[]).unwrap();
|
||||
assert_eq!(shards.len(), 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_large_data() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
let original: Vec<u8> = (0..10000).map(|i| (i % 256) as u8).collect();
|
||||
|
||||
let shards = codec.encode(&original).unwrap();
|
||||
let all_shards: Vec<Option<Vec<u8>>> = shards.into_iter().map(Some).collect();
|
||||
let decoded = codec.decode(all_shards, original.len()).unwrap();
|
||||
|
||||
assert_eq!(decoded, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shard_size_calculation() {
|
||||
let codec = Codec::new(4, 2).unwrap();
|
||||
|
||||
// 100 bytes / 4 shards = 25 bytes per shard
|
||||
assert_eq!(codec.shard_size(100), 25);
|
||||
|
||||
// 101 bytes / 4 shards = 26 bytes per shard (rounded up)
|
||||
assert_eq!(codec.shard_size(101), 26);
|
||||
|
||||
// 0 bytes = 0 bytes per shard
|
||||
assert_eq!(codec.shard_size(0), 0);
|
||||
}
|
||||
}
|
||||
179
lightningstor/crates/lightningstor-distributed/src/lib.rs
Normal file
179
lightningstor/crates/lightningstor-distributed/src/lib.rs
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
//! Distributed storage backends for LightningStor
|
||||
//!
|
||||
//! This crate provides distributed storage backends that implement redundancy
|
||||
//! through either Reed-Solomon erasure coding or N-way replication.
|
||||
//!
|
||||
//! # Features
|
||||
//!
|
||||
//! - **Erasure Coding**: Storage-efficient redundancy using Reed-Solomon codes.
|
||||
//! Configurable data/parity shard ratio (e.g., 4+2 for 1.5x overhead with 2-node
|
||||
//! fault tolerance).
|
||||
//!
|
||||
//! - **Replication**: Performance-oriented redundancy with N-way replication.
|
||||
//! Simple and fast, with configurable read/write quorums.
|
||||
//!
|
||||
//! - **Pluggable Node Management**: Support for static node configuration or
|
||||
//! dynamic discovery via ChainFire.
|
||||
//!
|
||||
//! - **Placement Strategies**: Consistent hashing, random, and round-robin
|
||||
//! node selection strategies.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! ```rust,no_run
|
||||
//! use lightningstor_distributed::{
|
||||
//! config::{DistributedConfig, RedundancyMode},
|
||||
//! backends::ErasureCodedBackend,
|
||||
//! node::StaticNodeRegistry,
|
||||
//! };
|
||||
//! use std::sync::Arc;
|
||||
//!
|
||||
//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
|
||||
//! // Configure 4+2 erasure coding
|
||||
//! let config = DistributedConfig {
|
||||
//! redundancy: RedundancyMode::ErasureCoded {
|
||||
//! data_shards: 4,
|
||||
//! parity_shards: 2,
|
||||
//! },
|
||||
//! node_endpoints: vec![
|
||||
//! "http://node1:9002".into(),
|
||||
//! "http://node2:9002".into(),
|
||||
//! "http://node3:9002".into(),
|
||||
//! "http://node4:9002".into(),
|
||||
//! "http://node5:9002".into(),
|
||||
//! "http://node6:9002".into(),
|
||||
//! ],
|
||||
//! ..Default::default()
|
||||
//! };
|
||||
//!
|
||||
//! // Create node registry
|
||||
//! let registry = Arc::new(
|
||||
//! StaticNodeRegistry::new(&config.node_endpoints).await?
|
||||
//! );
|
||||
//!
|
||||
//! // Create erasure-coded backend
|
||||
//! let backend = ErasureCodedBackend::new(config, registry).await?;
|
||||
//!
|
||||
//! // Use the backend via StorageBackend trait
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
|
||||
pub mod backends;
|
||||
pub mod chunk;
|
||||
pub mod config;
|
||||
pub mod erasure;
|
||||
pub mod node;
|
||||
pub mod placement;
|
||||
|
||||
// Re-export commonly used types
|
||||
pub use backends::{ErasureCodedBackend, ReplicatedBackend};
|
||||
pub use config::{BucketStorageConfig, ChunkConfig, DistributedConfig, RedundancyMode};
|
||||
pub use node::{MockNodeClient, MockNodeRegistry, NodeRegistry, StaticNodeRegistry};
|
||||
pub use placement::{ConsistentHashSelector, NodeSelector, RandomSelector, RoundRobinSelector};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use bytes::Bytes;
|
||||
use lightningstor_storage::StorageBackend;
|
||||
use lightningstor_types::ObjectId;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ec_backend_integration() {
|
||||
let config = DistributedConfig {
|
||||
redundancy: RedundancyMode::ErasureCoded {
|
||||
data_shards: 4,
|
||||
parity_shards: 2,
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(6));
|
||||
let backend = ErasureCodedBackend::new(config, registry).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from("Hello, erasure coded world!");
|
||||
|
||||
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||
|
||||
assert!(retrieved.len() >= data.len());
|
||||
assert_eq!(&retrieved[..data.len()], &data[..]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replicated_backend_integration() {
|
||||
let config = DistributedConfig {
|
||||
redundancy: RedundancyMode::replicated(3),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||
let backend = ReplicatedBackend::new(config, registry).await.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from("Hello, replicated world!");
|
||||
|
||||
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||
|
||||
assert_eq!(retrieved, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_ec_with_node_failures() {
|
||||
let config = DistributedConfig {
|
||||
redundancy: RedundancyMode::ErasureCoded {
|
||||
data_shards: 4,
|
||||
parity_shards: 2,
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(6));
|
||||
let backend = ErasureCodedBackend::new(config, registry.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 1000]);
|
||||
|
||||
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||
|
||||
// Fail 2 nodes (max tolerable for 4+2)
|
||||
let nodes = registry.all_mock_nodes();
|
||||
nodes[0].set_healthy(false);
|
||||
nodes[1].set_healthy(false);
|
||||
|
||||
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||
assert!(retrieved.len() >= data.len());
|
||||
assert_eq!(&retrieved[..data.len()], &data[..]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replicated_with_node_failure() {
|
||||
let config = DistributedConfig {
|
||||
redundancy: RedundancyMode::replicated(3),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let registry = Arc::new(MockNodeRegistry::with_nodes(3));
|
||||
let backend = ReplicatedBackend::new(config, registry.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let object_id = ObjectId::new();
|
||||
let data = Bytes::from(vec![42u8; 1000]);
|
||||
|
||||
backend.put_object(&object_id, data.clone()).await.unwrap();
|
||||
|
||||
// Fail 1 node
|
||||
let nodes = registry.all_mock_nodes();
|
||||
nodes[0].set_healthy(false);
|
||||
|
||||
let retrieved = backend.get_object(&object_id).await.unwrap();
|
||||
assert_eq!(retrieved, data);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,403 @@
|
|||
//! Node client for communicating with storage nodes
|
||||
|
||||
use super::{NodeError, NodeResult};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use lightningstor_node::proto::{
|
||||
ChunkExistsRequest, ChunkSizeRequest, DeleteChunkRequest, GetChunkRequest, PingRequest,
|
||||
PutChunkRequest,
|
||||
};
|
||||
use lightningstor_node::NodeServiceClient;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::RwLock;
|
||||
use tonic::transport::Channel;
|
||||
|
||||
/// Trait for storage node client operations
|
||||
#[async_trait]
|
||||
pub trait NodeClientTrait: Send + Sync {
|
||||
/// Get the node ID
|
||||
fn node_id(&self) -> &str;
|
||||
|
||||
/// Get the node endpoint
|
||||
fn endpoint(&self) -> &str;
|
||||
|
||||
/// Check if the node is currently considered healthy
|
||||
async fn is_healthy(&self) -> bool;
|
||||
|
||||
/// Store a chunk on this node
|
||||
async fn put_chunk(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
shard_index: u32,
|
||||
is_parity: bool,
|
||||
data: Bytes,
|
||||
) -> NodeResult<()>;
|
||||
|
||||
/// Retrieve a chunk from this node
|
||||
async fn get_chunk(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
shard_index: u32,
|
||||
is_parity: bool,
|
||||
) -> NodeResult<Vec<u8>>;
|
||||
|
||||
/// Delete a chunk from this node
|
||||
async fn delete_chunk(&self, chunk_id: &str) -> NodeResult<()>;
|
||||
|
||||
/// Check if a chunk exists on this node
|
||||
async fn chunk_exists(&self, chunk_id: &str) -> NodeResult<bool>;
|
||||
|
||||
/// Get the size of a chunk on this node
|
||||
async fn chunk_size(&self, chunk_id: &str) -> NodeResult<Option<u64>>;
|
||||
|
||||
/// Ping the node to check connectivity
|
||||
async fn ping(&self) -> NodeResult<Duration>;
|
||||
}
|
||||
|
||||
/// Real gRPC client for storage nodes
|
||||
///
|
||||
/// This client communicates with storage nodes over gRPC.
|
||||
/// For now, this is a placeholder that will be implemented
|
||||
/// when the storage node service is created.
|
||||
pub struct NodeClient {
|
||||
node_id: String,
|
||||
endpoint: String,
|
||||
healthy: AtomicBool,
|
||||
client: RwLock<NodeServiceClient<Channel>>,
|
||||
}
|
||||
|
||||
impl NodeClient {
|
||||
/// Connect to a storage node at the given endpoint
|
||||
pub async fn connect(endpoint: &str) -> NodeResult<Self> {
|
||||
// Ensure endpoint has scheme
|
||||
let endpoint_url = if endpoint.contains("://") {
|
||||
endpoint.to_string()
|
||||
} else {
|
||||
format!("http://{}", endpoint)
|
||||
};
|
||||
|
||||
let channel = Channel::from_shared(endpoint_url.clone())
|
||||
.map_err(|e| NodeError::ConnectionFailed {
|
||||
node_id: "unknown".to_string(),
|
||||
reason: e.to_string(),
|
||||
})?
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.connect()
|
||||
.await
|
||||
.map_err(|e| NodeError::ConnectionFailed {
|
||||
node_id: "unknown".to_string(),
|
||||
reason: e.to_string(),
|
||||
})?;
|
||||
|
||||
let client = NodeServiceClient::new(channel);
|
||||
|
||||
// Try to get node status to get the real node ID
|
||||
// If that fails, generate a temporary one based on endpoint, but connection is established
|
||||
let node_id = match client.clone().get_status(lightningstor_node::proto::GetStatusRequest {}).await {
|
||||
Ok(response) => response.into_inner().node_id,
|
||||
Err(_) => format!("node-{}", endpoint.replace([':', '.', '/'], "-")),
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
node_id,
|
||||
endpoint: endpoint.to_string(),
|
||||
healthy: AtomicBool::new(true),
|
||||
client: RwLock::new(client),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a client with a specific node ID
|
||||
pub async fn connect_with_id(node_id: &str, endpoint: &str) -> NodeResult<Self> {
|
||||
let endpoint_url = if endpoint.contains("://") {
|
||||
endpoint.to_string()
|
||||
} else {
|
||||
format!("http://{}", endpoint)
|
||||
};
|
||||
|
||||
// We use lazy connection here to not block startup if a node is temporarily down
|
||||
let channel = Channel::from_shared(endpoint_url.clone())
|
||||
.map_err(|e| NodeError::ConnectionFailed {
|
||||
node_id: node_id.to_string(),
|
||||
reason: e.to_string(),
|
||||
})?
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.connect_lazy();
|
||||
|
||||
let client = NodeServiceClient::new(channel);
|
||||
|
||||
Ok(Self {
|
||||
node_id: node_id.to_string(),
|
||||
endpoint: endpoint.to_string(),
|
||||
healthy: AtomicBool::new(true),
|
||||
client: RwLock::new(client),
|
||||
})
|
||||
}
|
||||
|
||||
/// Mark the node as unhealthy
|
||||
pub fn mark_unhealthy(&self) {
|
||||
self.healthy.store(false, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
/// Mark the node as healthy
|
||||
pub fn mark_healthy(&self) {
|
||||
self.healthy.store(true, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NodeClientTrait for NodeClient {
|
||||
fn node_id(&self) -> &str {
|
||||
&self.node_id
|
||||
}
|
||||
|
||||
fn endpoint(&self) -> &str {
|
||||
&self.endpoint
|
||||
}
|
||||
|
||||
async fn is_healthy(&self) -> bool {
|
||||
self.healthy.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
async fn put_chunk(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
shard_index: u32,
|
||||
is_parity: bool,
|
||||
data: Bytes,
|
||||
) -> NodeResult<()> {
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
let request = PutChunkRequest {
|
||||
chunk_id: chunk_id.to_string(),
|
||||
shard_index,
|
||||
is_parity,
|
||||
data: data.to_vec(),
|
||||
};
|
||||
|
||||
let mut client = self.client.write().await;
|
||||
client
|
||||
.put_chunk(request)
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(|e| NodeError::RpcFailed(e.to_string()))
|
||||
}
|
||||
|
||||
async fn get_chunk(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
shard_index: u32,
|
||||
is_parity: bool,
|
||||
) -> NodeResult<Vec<u8>> {
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
let request = GetChunkRequest {
|
||||
chunk_id: chunk_id.to_string(),
|
||||
shard_index,
|
||||
is_parity,
|
||||
};
|
||||
|
||||
let mut client = self.client.write().await;
|
||||
let response = client
|
||||
.get_chunk(request)
|
||||
.await
|
||||
.map_err(|e| match e.code() {
|
||||
tonic::Code::NotFound => NodeError::NotFound(chunk_id.to_string()),
|
||||
_ => NodeError::RpcFailed(e.to_string()),
|
||||
})?;
|
||||
|
||||
Ok(response.into_inner().data)
|
||||
}
|
||||
|
||||
async fn delete_chunk(&self, chunk_id: &str) -> NodeResult<()> {
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
let request = DeleteChunkRequest {
|
||||
chunk_id: chunk_id.to_string(),
|
||||
};
|
||||
|
||||
let mut client = self.client.write().await;
|
||||
client
|
||||
.delete_chunk(request)
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(|e| NodeError::RpcFailed(e.to_string()))
|
||||
}
|
||||
|
||||
async fn chunk_exists(&self, chunk_id: &str) -> NodeResult<bool> {
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
let request = ChunkExistsRequest {
|
||||
chunk_id: chunk_id.to_string(),
|
||||
};
|
||||
|
||||
let mut client = self.client.write().await;
|
||||
let response = client
|
||||
.chunk_exists(request)
|
||||
.await
|
||||
.map_err(|e| NodeError::RpcFailed(e.to_string()))?;
|
||||
|
||||
Ok(response.into_inner().exists)
|
||||
}
|
||||
|
||||
async fn chunk_size(&self, chunk_id: &str) -> NodeResult<Option<u64>> {
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
let request = ChunkSizeRequest {
|
||||
chunk_id: chunk_id.to_string(),
|
||||
};
|
||||
|
||||
let mut client = self.client.write().await;
|
||||
let response = client
|
||||
.chunk_size(request)
|
||||
.await
|
||||
.map_err(|e| NodeError::RpcFailed(e.to_string()))?;
|
||||
|
||||
let inner = response.into_inner();
|
||||
if inner.exists {
|
||||
Ok(Some(inner.size))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
async fn ping(&self) -> NodeResult<Duration> {
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
let request = PingRequest {};
|
||||
|
||||
let mut client = self.client.write().await;
|
||||
let _ = client
|
||||
.ping(request)
|
||||
.await
|
||||
.map_err(|e| NodeError::RpcFailed(e.to_string()))?;
|
||||
|
||||
Ok(start.elapsed())
|
||||
}
|
||||
}
|
||||
|
||||
/// A pool of node clients for connection reuse
|
||||
pub struct NodeClientPool {
|
||||
clients: RwLock<Vec<Arc<dyn NodeClientTrait>>>,
|
||||
}
|
||||
|
||||
impl NodeClientPool {
|
||||
/// Create a new empty client pool
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
clients: RwLock::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a client to the pool
|
||||
pub async fn add(&self, client: Arc<dyn NodeClientTrait>) {
|
||||
self.clients.write().await.push(client);
|
||||
}
|
||||
|
||||
/// Get all clients in the pool
|
||||
pub async fn all(&self) -> Vec<Arc<dyn NodeClientTrait>> {
|
||||
self.clients.read().await.clone()
|
||||
}
|
||||
|
||||
/// Get all healthy clients
|
||||
pub async fn healthy(&self) -> Vec<Arc<dyn NodeClientTrait>> {
|
||||
let clients = self.clients.read().await;
|
||||
let mut healthy = Vec::new();
|
||||
for client in clients.iter() {
|
||||
if client.is_healthy().await {
|
||||
healthy.push(client.clone());
|
||||
}
|
||||
}
|
||||
healthy
|
||||
}
|
||||
|
||||
/// Get a client by node ID
|
||||
pub async fn get(&self, node_id: &str) -> Option<Arc<dyn NodeClientTrait>> {
|
||||
self.clients
|
||||
.read()
|
||||
.await
|
||||
.iter()
|
||||
.find(|c| c.node_id() == node_id)
|
||||
.cloned()
|
||||
}
|
||||
|
||||
/// Remove a client from the pool
|
||||
pub async fn remove(&self, node_id: &str) {
|
||||
self.clients
|
||||
.write()
|
||||
.await
|
||||
.retain(|c| c.node_id() != node_id);
|
||||
}
|
||||
|
||||
/// Get the number of clients in the pool
|
||||
pub async fn len(&self) -> usize {
|
||||
self.clients.read().await.len()
|
||||
}
|
||||
|
||||
/// Check if the pool is empty
|
||||
pub async fn is_empty(&self) -> bool {
|
||||
self.clients.read().await.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for NodeClientPool {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_node_client_creation() {
|
||||
let client = NodeClient::connect("http://localhost:9002").await.unwrap();
|
||||
assert!(client.is_healthy().await);
|
||||
assert!(!client.node_id().is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_node_client_health_toggle() {
|
||||
let client = NodeClient::connect("http://localhost:9002").await.unwrap();
|
||||
|
||||
assert!(client.is_healthy().await);
|
||||
client.mark_unhealthy();
|
||||
assert!(!client.is_healthy().await);
|
||||
client.mark_healthy();
|
||||
assert!(client.is_healthy().await);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_node_client_pool() {
|
||||
let pool = NodeClientPool::new();
|
||||
assert!(pool.is_empty().await);
|
||||
|
||||
let client1 = Arc::new(NodeClient::connect("http://node1:9002").await.unwrap());
|
||||
let client2 = Arc::new(NodeClient::connect("http://node2:9002").await.unwrap());
|
||||
|
||||
pool.add(client1.clone()).await;
|
||||
pool.add(client2.clone()).await;
|
||||
|
||||
assert_eq!(pool.len().await, 2);
|
||||
assert!(pool.get(client1.node_id()).await.is_some());
|
||||
|
||||
pool.remove(client1.node_id()).await;
|
||||
assert_eq!(pool.len().await, 1);
|
||||
assert!(pool.get(client1.node_id()).await.is_none());
|
||||
}
|
||||
}
|
||||
408
lightningstor/crates/lightningstor-distributed/src/node/mock.rs
Normal file
408
lightningstor/crates/lightningstor-distributed/src/node/mock.rs
Normal file
|
|
@ -0,0 +1,408 @@
|
|||
//! Mock implementations for testing
|
||||
|
||||
use super::client::NodeClientTrait;
|
||||
use super::registry::{NodeInfo, NodeRegistry};
|
||||
use super::{NodeError, NodeResult};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use dashmap::DashMap;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Mock storage node client for testing
|
||||
///
|
||||
/// Stores chunks in memory and allows simulation of failures.
|
||||
pub struct MockNodeClient {
|
||||
node_id: String,
|
||||
endpoint: String,
|
||||
chunks: DashMap<String, Vec<u8>>,
|
||||
healthy: AtomicBool,
|
||||
// Counters for verification
|
||||
put_count: AtomicU64,
|
||||
get_count: AtomicU64,
|
||||
delete_count: AtomicU64,
|
||||
// Failure injection
|
||||
fail_puts: AtomicBool,
|
||||
fail_gets: AtomicBool,
|
||||
fail_deletes: AtomicBool,
|
||||
}
|
||||
|
||||
impl MockNodeClient {
|
||||
/// Create a new mock node client
|
||||
pub fn new(node_id: impl Into<String>, endpoint: impl Into<String>) -> Self {
|
||||
Self {
|
||||
node_id: node_id.into(),
|
||||
endpoint: endpoint.into(),
|
||||
chunks: DashMap::new(),
|
||||
healthy: AtomicBool::new(true),
|
||||
put_count: AtomicU64::new(0),
|
||||
get_count: AtomicU64::new(0),
|
||||
delete_count: AtomicU64::new(0),
|
||||
fail_puts: AtomicBool::new(false),
|
||||
fail_gets: AtomicBool::new(false),
|
||||
fail_deletes: AtomicBool::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the health status of this mock node
|
||||
pub fn set_healthy(&self, healthy: bool) {
|
||||
self.healthy.store(healthy, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
/// Enable/disable put failures
|
||||
pub fn set_fail_puts(&self, fail: bool) {
|
||||
self.fail_puts.store(fail, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
/// Enable/disable get failures
|
||||
pub fn set_fail_gets(&self, fail: bool) {
|
||||
self.fail_gets.store(fail, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
/// Enable/disable delete failures
|
||||
pub fn set_fail_deletes(&self, fail: bool) {
|
||||
self.fail_deletes.store(fail, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
/// Get the count of put operations
|
||||
pub fn put_count(&self) -> u64 {
|
||||
self.put_count.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
/// Get the count of get operations
|
||||
pub fn get_count(&self) -> u64 {
|
||||
self.get_count.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
/// Get the count of delete operations
|
||||
pub fn delete_count(&self) -> u64 {
|
||||
self.delete_count.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
/// Get all stored chunk IDs
|
||||
pub fn chunk_ids(&self) -> Vec<String> {
|
||||
self.chunks.iter().map(|r| r.key().clone()).collect()
|
||||
}
|
||||
|
||||
/// Get the number of stored chunks
|
||||
pub fn chunk_count(&self) -> usize {
|
||||
self.chunks.len()
|
||||
}
|
||||
|
||||
/// Clear all stored chunks
|
||||
pub fn clear(&self) {
|
||||
self.chunks.clear();
|
||||
}
|
||||
|
||||
/// Reset all counters
|
||||
pub fn reset_counters(&self) {
|
||||
self.put_count.store(0, Ordering::SeqCst);
|
||||
self.get_count.store(0, Ordering::SeqCst);
|
||||
self.delete_count.store(0, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NodeClientTrait for MockNodeClient {
|
||||
fn node_id(&self) -> &str {
|
||||
&self.node_id
|
||||
}
|
||||
|
||||
fn endpoint(&self) -> &str {
|
||||
&self.endpoint
|
||||
}
|
||||
|
||||
async fn is_healthy(&self) -> bool {
|
||||
self.healthy.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
async fn put_chunk(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
_shard_index: u32,
|
||||
_is_parity: bool,
|
||||
data: Bytes,
|
||||
) -> NodeResult<()> {
|
||||
self.put_count.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
if self.fail_puts.load(Ordering::SeqCst) {
|
||||
return Err(NodeError::RpcFailed("Simulated put failure".into()));
|
||||
}
|
||||
|
||||
self.chunks.insert(chunk_id.to_string(), data.to_vec());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_chunk(
|
||||
&self,
|
||||
chunk_id: &str,
|
||||
_shard_index: u32,
|
||||
_is_parity: bool,
|
||||
) -> NodeResult<Vec<u8>> {
|
||||
self.get_count.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
if self.fail_gets.load(Ordering::SeqCst) {
|
||||
return Err(NodeError::RpcFailed("Simulated get failure".into()));
|
||||
}
|
||||
|
||||
self.chunks
|
||||
.get(chunk_id)
|
||||
.map(|r| r.value().clone())
|
||||
.ok_or_else(|| NodeError::NotFound(chunk_id.to_string()))
|
||||
}
|
||||
|
||||
async fn delete_chunk(&self, chunk_id: &str) -> NodeResult<()> {
|
||||
self.delete_count.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
if self.fail_deletes.load(Ordering::SeqCst) {
|
||||
return Err(NodeError::RpcFailed("Simulated delete failure".into()));
|
||||
}
|
||||
|
||||
self.chunks.remove(chunk_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn chunk_exists(&self, chunk_id: &str) -> NodeResult<bool> {
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
Ok(self.chunks.contains_key(chunk_id))
|
||||
}
|
||||
|
||||
async fn chunk_size(&self, chunk_id: &str) -> NodeResult<Option<u64>> {
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
Ok(self.chunks.get(chunk_id).map(|r| r.value().len() as u64))
|
||||
}
|
||||
|
||||
async fn ping(&self) -> NodeResult<Duration> {
|
||||
if !self.is_healthy().await {
|
||||
return Err(NodeError::Unhealthy(self.node_id.clone()));
|
||||
}
|
||||
|
||||
Ok(Duration::from_micros(100)) // Simulated latency
|
||||
}
|
||||
}
|
||||
|
||||
/// Mock node registry for testing
|
||||
pub struct MockNodeRegistry {
|
||||
nodes: DashMap<String, Arc<MockNodeClient>>,
|
||||
}
|
||||
|
||||
impl MockNodeRegistry {
|
||||
/// Create a new empty mock registry
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
nodes: DashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a mock node to the registry
|
||||
pub fn add_mock_node(&self, node: Arc<MockNodeClient>) {
|
||||
self.nodes.insert(node.node_id().to_string(), node);
|
||||
}
|
||||
|
||||
/// Get a mock node by ID
|
||||
pub fn get_mock_node(&self, node_id: &str) -> Option<Arc<MockNodeClient>> {
|
||||
self.nodes.get(node_id).map(|r| r.value().clone())
|
||||
}
|
||||
|
||||
/// Create a registry with N mock nodes
|
||||
pub fn with_nodes(count: usize) -> Self {
|
||||
let registry = Self::new();
|
||||
for i in 0..count {
|
||||
let node = Arc::new(MockNodeClient::new(
|
||||
format!("node-{}", i),
|
||||
format!("http://node-{}:9002", i),
|
||||
));
|
||||
registry.add_mock_node(node);
|
||||
}
|
||||
registry
|
||||
}
|
||||
|
||||
/// Get all mock nodes
|
||||
pub fn all_mock_nodes(&self) -> Vec<Arc<MockNodeClient>> {
|
||||
self.nodes.iter().map(|r| r.value().clone()).collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for MockNodeRegistry {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NodeRegistry for MockNodeRegistry {
|
||||
async fn get_all_nodes(&self) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||
Ok(self
|
||||
.nodes
|
||||
.iter()
|
||||
.map(|r| r.value().clone() as Arc<dyn NodeClientTrait>)
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn get_healthy_nodes(&self) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||
let mut healthy = Vec::new();
|
||||
for node_ref in self.nodes.iter() {
|
||||
let node = node_ref.value();
|
||||
if node.is_healthy().await {
|
||||
healthy.push(node.clone() as Arc<dyn NodeClientTrait>);
|
||||
}
|
||||
}
|
||||
Ok(healthy)
|
||||
}
|
||||
|
||||
async fn register_node(&self, info: NodeInfo) -> NodeResult<()> {
|
||||
let node = Arc::new(MockNodeClient::new(&info.node_id, &info.endpoint));
|
||||
self.nodes.insert(info.node_id, node);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn deregister_node(&self, node_id: &str) -> NodeResult<()> {
|
||||
self.nodes.remove(node_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn update_health(&self, node_id: &str, healthy: bool) -> NodeResult<()> {
|
||||
if let Some(node) = self.nodes.get(node_id) {
|
||||
node.set_healthy(healthy);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_node(&self, node_id: &str) -> NodeResult<Option<Arc<dyn NodeClientTrait>>> {
|
||||
Ok(self
|
||||
.nodes
|
||||
.get(node_id)
|
||||
.map(|r| r.value().clone() as Arc<dyn NodeClientTrait>))
|
||||
}
|
||||
|
||||
async fn node_count(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
|
||||
async fn healthy_node_count(&self) -> usize {
|
||||
let mut count = 0;
|
||||
for node_ref in self.nodes.iter() {
|
||||
if node_ref.value().is_healthy().await {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mock_node_put_get() {
|
||||
let node = MockNodeClient::new("node-1", "http://localhost:9002");
|
||||
|
||||
let chunk_id = "test-chunk-1";
|
||||
let data = Bytes::from(vec![1, 2, 3, 4, 5]);
|
||||
|
||||
node.put_chunk(chunk_id, 0, false, data.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
let retrieved = node.get_chunk(chunk_id, 0, false).await.unwrap();
|
||||
|
||||
assert_eq!(retrieved, data.to_vec());
|
||||
assert_eq!(node.put_count(), 1);
|
||||
assert_eq!(node.get_count(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mock_node_delete() {
|
||||
let node = MockNodeClient::new("node-1", "http://localhost:9002");
|
||||
|
||||
let chunk_id = "test-chunk-1";
|
||||
let data = Bytes::from(vec![1, 2, 3]);
|
||||
|
||||
node.put_chunk(chunk_id, 0, false, data).await.unwrap();
|
||||
assert!(node.chunk_exists(chunk_id).await.unwrap());
|
||||
|
||||
node.delete_chunk(chunk_id).await.unwrap();
|
||||
assert!(!node.chunk_exists(chunk_id).await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mock_node_health() {
|
||||
let node = MockNodeClient::new("node-1", "http://localhost:9002");
|
||||
|
||||
assert!(node.is_healthy().await);
|
||||
|
||||
node.set_healthy(false);
|
||||
assert!(!node.is_healthy().await);
|
||||
|
||||
let result = node.put_chunk("chunk", 0, false, Bytes::new()).await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mock_node_failure_injection() {
|
||||
let node = MockNodeClient::new("node-1", "http://localhost:9002");
|
||||
|
||||
// Normal operation
|
||||
node.put_chunk("chunk", 0, false, Bytes::from(vec![1]))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Enable failure injection
|
||||
node.set_fail_puts(true);
|
||||
let result = node.put_chunk("chunk2", 0, false, Bytes::from(vec![2])).await;
|
||||
assert!(result.is_err());
|
||||
|
||||
// Disable failure injection
|
||||
node.set_fail_puts(false);
|
||||
node.put_chunk("chunk3", 0, false, Bytes::from(vec![3]))
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mock_registry() {
|
||||
let registry = MockNodeRegistry::with_nodes(3);
|
||||
|
||||
assert_eq!(registry.node_count().await, 3);
|
||||
assert_eq!(registry.healthy_node_count().await, 3);
|
||||
|
||||
// Mark one node unhealthy
|
||||
registry.update_health("node-1", false).await.unwrap();
|
||||
assert_eq!(registry.healthy_node_count().await, 2);
|
||||
|
||||
// Get healthy nodes
|
||||
let healthy = registry.get_healthy_nodes().await.unwrap();
|
||||
assert_eq!(healthy.len(), 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mock_registry_register_deregister() {
|
||||
let registry = MockNodeRegistry::new();
|
||||
|
||||
let info = NodeInfo::new("new-node", "http://new:9002");
|
||||
registry.register_node(info).await.unwrap();
|
||||
assert_eq!(registry.node_count().await, 1);
|
||||
|
||||
registry.deregister_node("new-node").await.unwrap();
|
||||
assert_eq!(registry.node_count().await, 0);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
//! Node management for distributed storage
|
||||
//!
|
||||
//! This module provides abstractions for managing storage nodes,
|
||||
//! including node discovery, health checking, and communication.
|
||||
|
||||
pub mod client;
|
||||
pub mod mock;
|
||||
pub mod registry;
|
||||
|
||||
pub use client::{NodeClient, NodeClientTrait};
|
||||
pub use mock::{MockNodeClient, MockNodeRegistry};
|
||||
pub use registry::{NodeInfo, NodeRegistry, StaticNodeRegistry};
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
/// Errors that can occur during node operations
|
||||
#[derive(Debug, Error)]
|
||||
pub enum NodeError {
|
||||
#[error("Connection failed to node {node_id}: {reason}")]
|
||||
ConnectionFailed { node_id: String, reason: String },
|
||||
|
||||
#[error("RPC failed: {0}")]
|
||||
RpcFailed(String),
|
||||
|
||||
#[error("Node not found: {0}")]
|
||||
NotFound(String),
|
||||
|
||||
#[error("Timeout waiting for response")]
|
||||
Timeout,
|
||||
|
||||
#[error("Node unhealthy: {0}")]
|
||||
Unhealthy(String),
|
||||
|
||||
#[error("Not enough healthy nodes: need {needed}, have {available}")]
|
||||
NotEnoughNodes { needed: usize, available: usize },
|
||||
}
|
||||
|
||||
/// Result type for node operations
|
||||
pub type NodeResult<T> = Result<T, NodeError>;
|
||||
|
|
@ -0,0 +1,281 @@
|
|||
//! Node registry for discovering and tracking storage nodes
|
||||
|
||||
use super::client::{NodeClient, NodeClientTrait};
|
||||
use super::NodeResult;
|
||||
use async_trait::async_trait;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
/// Information about a storage node
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NodeInfo {
|
||||
/// Unique node identifier
|
||||
pub node_id: String,
|
||||
/// gRPC endpoint (host:port)
|
||||
pub endpoint: String,
|
||||
/// Zone/rack identifier for placement
|
||||
pub zone: String,
|
||||
/// Region identifier
|
||||
pub region: String,
|
||||
/// Total storage capacity in bytes
|
||||
pub capacity_bytes: u64,
|
||||
/// Currently used storage in bytes
|
||||
pub used_bytes: u64,
|
||||
/// Whether the node is healthy
|
||||
pub healthy: bool,
|
||||
}
|
||||
|
||||
impl NodeInfo {
|
||||
/// Create a new node info with minimal information
|
||||
pub fn new(node_id: impl Into<String>, endpoint: impl Into<String>) -> Self {
|
||||
Self {
|
||||
node_id: node_id.into(),
|
||||
endpoint: endpoint.into(),
|
||||
zone: String::new(),
|
||||
region: String::new(),
|
||||
capacity_bytes: 0,
|
||||
used_bytes: 0,
|
||||
healthy: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder method to set the zone
|
||||
pub fn with_zone(mut self, zone: impl Into<String>) -> Self {
|
||||
self.zone = zone.into();
|
||||
self
|
||||
}
|
||||
|
||||
/// Builder method to set the region
|
||||
pub fn with_region(mut self, region: impl Into<String>) -> Self {
|
||||
self.region = region.into();
|
||||
self
|
||||
}
|
||||
|
||||
/// Builder method to set capacity
|
||||
pub fn with_capacity(mut self, capacity_bytes: u64) -> Self {
|
||||
self.capacity_bytes = capacity_bytes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Calculate available storage in bytes
|
||||
pub fn available_bytes(&self) -> u64 {
|
||||
self.capacity_bytes.saturating_sub(self.used_bytes)
|
||||
}
|
||||
|
||||
/// Calculate usage percentage
|
||||
pub fn usage_percent(&self) -> f64 {
|
||||
if self.capacity_bytes == 0 {
|
||||
0.0
|
||||
} else {
|
||||
(self.used_bytes as f64 / self.capacity_bytes as f64) * 100.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for node registry implementations
|
||||
#[async_trait]
|
||||
pub trait NodeRegistry: Send + Sync {
|
||||
/// Get all registered nodes
|
||||
async fn get_all_nodes(&self) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>>;
|
||||
|
||||
/// Get only healthy nodes
|
||||
async fn get_healthy_nodes(&self) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>>;
|
||||
|
||||
/// Register a new node
|
||||
async fn register_node(&self, info: NodeInfo) -> NodeResult<()>;
|
||||
|
||||
/// Remove a node from the registry
|
||||
async fn deregister_node(&self, node_id: &str) -> NodeResult<()>;
|
||||
|
||||
/// Update node health status
|
||||
async fn update_health(&self, node_id: &str, healthy: bool) -> NodeResult<()>;
|
||||
|
||||
/// Get a specific node by ID
|
||||
async fn get_node(&self, node_id: &str) -> NodeResult<Option<Arc<dyn NodeClientTrait>>>;
|
||||
|
||||
/// Get the number of registered nodes
|
||||
async fn node_count(&self) -> usize;
|
||||
|
||||
/// Get the number of healthy nodes
|
||||
async fn healthy_node_count(&self) -> usize;
|
||||
}
|
||||
|
||||
/// Static node registry that uses a fixed list of endpoints
|
||||
///
|
||||
/// Nodes are configured at startup and don't change dynamically.
|
||||
pub struct StaticNodeRegistry {
|
||||
nodes: RwLock<Vec<Arc<dyn NodeClientTrait>>>,
|
||||
node_info: RwLock<Vec<NodeInfo>>,
|
||||
}
|
||||
|
||||
impl StaticNodeRegistry {
|
||||
/// Create a new static node registry with the given endpoints
|
||||
pub async fn new(endpoints: &[String]) -> NodeResult<Self> {
|
||||
let mut nodes: Vec<Arc<dyn NodeClientTrait>> = Vec::new();
|
||||
let mut node_info = Vec::new();
|
||||
|
||||
for (i, endpoint) in endpoints.iter().enumerate() {
|
||||
let node_id = format!("node-{}", i);
|
||||
let client = NodeClient::connect_with_id(&node_id, endpoint).await?;
|
||||
let info = NodeInfo::new(&node_id, endpoint);
|
||||
|
||||
nodes.push(Arc::new(client));
|
||||
node_info.push(info);
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
nodes: RwLock::new(nodes),
|
||||
node_info: RwLock::new(node_info),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create an empty registry
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
nodes: RwLock::new(Vec::new()),
|
||||
node_info: RwLock::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get node info for all nodes
|
||||
pub async fn get_node_info(&self) -> Vec<NodeInfo> {
|
||||
self.node_info.read().await.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NodeRegistry for StaticNodeRegistry {
|
||||
async fn get_all_nodes(&self) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||
Ok(self.nodes.read().await.clone())
|
||||
}
|
||||
|
||||
async fn get_healthy_nodes(&self) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||
let nodes = self.nodes.read().await;
|
||||
let mut healthy = Vec::new();
|
||||
for node in nodes.iter() {
|
||||
if node.is_healthy().await {
|
||||
healthy.push(node.clone());
|
||||
}
|
||||
}
|
||||
Ok(healthy)
|
||||
}
|
||||
|
||||
async fn register_node(&self, info: NodeInfo) -> NodeResult<()> {
|
||||
let client = NodeClient::connect_with_id(&info.node_id, &info.endpoint).await?;
|
||||
|
||||
self.nodes.write().await.push(Arc::new(client));
|
||||
self.node_info.write().await.push(info);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn deregister_node(&self, node_id: &str) -> NodeResult<()> {
|
||||
self.nodes
|
||||
.write()
|
||||
.await
|
||||
.retain(|n| n.node_id() != node_id);
|
||||
self.node_info
|
||||
.write()
|
||||
.await
|
||||
.retain(|n| n.node_id != node_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn update_health(&self, node_id: &str, healthy: bool) -> NodeResult<()> {
|
||||
let mut info = self.node_info.write().await;
|
||||
for node_info in info.iter_mut() {
|
||||
if node_info.node_id == node_id {
|
||||
node_info.healthy = healthy;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: For static registry, we don't actually update the client health
|
||||
// as the client manages its own health state. This is mainly for tracking.
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_node(&self, node_id: &str) -> NodeResult<Option<Arc<dyn NodeClientTrait>>> {
|
||||
let nodes = self.nodes.read().await;
|
||||
Ok(nodes.iter().find(|n| n.node_id() == node_id).cloned())
|
||||
}
|
||||
|
||||
async fn node_count(&self) -> usize {
|
||||
self.nodes.read().await.len()
|
||||
}
|
||||
|
||||
async fn healthy_node_count(&self) -> usize {
|
||||
let nodes = self.nodes.read().await;
|
||||
let mut count = 0;
|
||||
for node in nodes.iter() {
|
||||
if node.is_healthy().await {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_node_info_creation() {
|
||||
let info = NodeInfo::new("node-1", "http://localhost:9002")
|
||||
.with_zone("zone-a")
|
||||
.with_region("us-east-1")
|
||||
.with_capacity(1024 * 1024 * 1024);
|
||||
|
||||
assert_eq!(info.node_id, "node-1");
|
||||
assert_eq!(info.endpoint, "http://localhost:9002");
|
||||
assert_eq!(info.zone, "zone-a");
|
||||
assert_eq!(info.region, "us-east-1");
|
||||
assert_eq!(info.capacity_bytes, 1024 * 1024 * 1024);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_info_usage() {
|
||||
let mut info = NodeInfo::new("node-1", "http://localhost:9002").with_capacity(1000);
|
||||
|
||||
info.used_bytes = 250;
|
||||
assert_eq!(info.available_bytes(), 750);
|
||||
assert!((info.usage_percent() - 25.0).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_static_registry_creation() {
|
||||
let endpoints = vec![
|
||||
"http://node1:9002".to_string(),
|
||||
"http://node2:9002".to_string(),
|
||||
];
|
||||
|
||||
let registry = StaticNodeRegistry::new(&endpoints).await.unwrap();
|
||||
assert_eq!(registry.node_count().await, 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_static_registry_register_deregister() {
|
||||
let registry = StaticNodeRegistry::empty();
|
||||
assert_eq!(registry.node_count().await, 0);
|
||||
|
||||
let info = NodeInfo::new("node-1", "http://localhost:9002");
|
||||
registry.register_node(info).await.unwrap();
|
||||
assert_eq!(registry.node_count().await, 1);
|
||||
|
||||
registry.deregister_node("node-1").await.unwrap();
|
||||
assert_eq!(registry.node_count().await, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_static_registry_get_node() {
|
||||
let endpoints = vec!["http://node1:9002".to_string()];
|
||||
let registry = StaticNodeRegistry::new(&endpoints).await.unwrap();
|
||||
|
||||
let node = registry.get_node("node-0").await.unwrap();
|
||||
assert!(node.is_some());
|
||||
|
||||
let missing = registry.get_node("nonexistent").await.unwrap();
|
||||
assert!(missing.is_none());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,398 @@
|
|||
//! Data placement strategies for distributed storage
|
||||
//!
|
||||
//! This module provides strategies for selecting which nodes should store
|
||||
//! which data, supporting consistent hashing, random placement, and
|
||||
//! zone-aware placement.
|
||||
|
||||
use crate::node::{NodeClientTrait, NodeResult};
|
||||
use async_trait::async_trait;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Trait for node selection strategies
|
||||
#[async_trait]
|
||||
pub trait NodeSelector: Send + Sync {
|
||||
/// Select N nodes for storing data
|
||||
///
|
||||
/// Returns a vector of selected nodes, in order of preference.
|
||||
async fn select_nodes(
|
||||
&self,
|
||||
available_nodes: &[Arc<dyn NodeClientTrait>],
|
||||
count: usize,
|
||||
) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>>;
|
||||
|
||||
/// Select a single node for reading data
|
||||
///
|
||||
/// The key is used to deterministically select the same node
|
||||
/// for the same data (for cache efficiency).
|
||||
async fn select_for_read(
|
||||
&self,
|
||||
available_nodes: &[Arc<dyn NodeClientTrait>],
|
||||
key: &str,
|
||||
) -> NodeResult<Arc<dyn NodeClientTrait>>;
|
||||
}
|
||||
|
||||
/// Consistent hash-based node selector
|
||||
///
|
||||
/// Uses consistent hashing to select nodes, ensuring minimal data movement
|
||||
/// when nodes are added or removed.
|
||||
pub struct ConsistentHashSelector {
|
||||
/// Number of virtual nodes per physical node
|
||||
virtual_nodes: usize,
|
||||
}
|
||||
|
||||
impl ConsistentHashSelector {
|
||||
/// Create a new consistent hash selector with default settings
|
||||
pub fn new() -> Self {
|
||||
Self { virtual_nodes: 100 }
|
||||
}
|
||||
|
||||
/// Create a new consistent hash selector with custom virtual node count
|
||||
pub fn with_virtual_nodes(virtual_nodes: usize) -> Self {
|
||||
Self { virtual_nodes }
|
||||
}
|
||||
|
||||
/// Hash a string to a u64
|
||||
fn hash_key(key: &str) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
key.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
/// Get the hash ring position for a node and virtual node index
|
||||
fn node_position(&self, node_id: &str, vnode_index: usize) -> u64 {
|
||||
Self::hash_key(&format!("{}:{}", node_id, vnode_index))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ConsistentHashSelector {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NodeSelector for ConsistentHashSelector {
|
||||
async fn select_nodes(
|
||||
&self,
|
||||
available_nodes: &[Arc<dyn NodeClientTrait>],
|
||||
count: usize,
|
||||
) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||
if available_nodes.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let count = count.min(available_nodes.len());
|
||||
|
||||
// Build the hash ring with virtual nodes
|
||||
let mut ring: Vec<(u64, usize)> = Vec::new();
|
||||
for (node_idx, node) in available_nodes.iter().enumerate() {
|
||||
for vnode_idx in 0..self.virtual_nodes {
|
||||
let pos = self.node_position(node.node_id(), vnode_idx);
|
||||
ring.push((pos, node_idx));
|
||||
}
|
||||
}
|
||||
ring.sort_by_key(|(pos, _)| *pos);
|
||||
|
||||
// Select nodes by walking the ring
|
||||
let mut selected_indices = Vec::with_capacity(count);
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
|
||||
// Start from a random position (using current time for diversity)
|
||||
let start_pos = Self::hash_key(&format!("{:?}", std::time::Instant::now()));
|
||||
let start_idx = ring
|
||||
.binary_search_by_key(&start_pos, |(pos, _)| *pos)
|
||||
.unwrap_or_else(|i| i % ring.len());
|
||||
|
||||
for i in 0..ring.len() {
|
||||
let idx = (start_idx + i) % ring.len();
|
||||
let node_idx = ring[idx].1;
|
||||
|
||||
if seen.insert(node_idx) {
|
||||
selected_indices.push(node_idx);
|
||||
if selected_indices.len() >= count {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(selected_indices
|
||||
.into_iter()
|
||||
.map(|idx| available_nodes[idx].clone())
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn select_for_read(
|
||||
&self,
|
||||
available_nodes: &[Arc<dyn NodeClientTrait>],
|
||||
key: &str,
|
||||
) -> NodeResult<Arc<dyn NodeClientTrait>> {
|
||||
if available_nodes.is_empty() {
|
||||
return Err(crate::node::NodeError::NotEnoughNodes {
|
||||
needed: 1,
|
||||
available: 0,
|
||||
});
|
||||
}
|
||||
|
||||
// Build the hash ring
|
||||
let mut ring: Vec<(u64, usize)> = Vec::new();
|
||||
for (node_idx, node) in available_nodes.iter().enumerate() {
|
||||
for vnode_idx in 0..self.virtual_nodes {
|
||||
let pos = self.node_position(node.node_id(), vnode_idx);
|
||||
ring.push((pos, node_idx));
|
||||
}
|
||||
}
|
||||
ring.sort_by_key(|(pos, _)| *pos);
|
||||
|
||||
// Find the first node after the key's position
|
||||
let key_pos = Self::hash_key(key);
|
||||
let idx = ring
|
||||
.binary_search_by_key(&key_pos, |(pos, _)| *pos)
|
||||
.unwrap_or_else(|i| i % ring.len());
|
||||
|
||||
let node_idx = ring[idx].1;
|
||||
Ok(available_nodes[node_idx].clone())
|
||||
}
|
||||
}
|
||||
|
||||
/// Random node selector
|
||||
///
|
||||
/// Randomly selects nodes for placement. Simple but doesn't provide
|
||||
/// consistent placement across operations.
|
||||
pub struct RandomSelector;
|
||||
|
||||
impl RandomSelector {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RandomSelector {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NodeSelector for RandomSelector {
|
||||
async fn select_nodes(
|
||||
&self,
|
||||
available_nodes: &[Arc<dyn NodeClientTrait>],
|
||||
count: usize,
|
||||
) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||
if available_nodes.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let count = count.min(available_nodes.len());
|
||||
|
||||
// Shuffle using Fisher-Yates with simple random
|
||||
let mut indices: Vec<usize> = (0..available_nodes.len()).collect();
|
||||
let seed = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_nanos() as u64;
|
||||
|
||||
let mut rng = seed;
|
||||
for i in (1..indices.len()).rev() {
|
||||
// Simple LCG random
|
||||
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1);
|
||||
let j = (rng as usize) % (i + 1);
|
||||
indices.swap(i, j);
|
||||
}
|
||||
|
||||
Ok(indices
|
||||
.into_iter()
|
||||
.take(count)
|
||||
.map(|idx| available_nodes[idx].clone())
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn select_for_read(
|
||||
&self,
|
||||
available_nodes: &[Arc<dyn NodeClientTrait>],
|
||||
_key: &str,
|
||||
) -> NodeResult<Arc<dyn NodeClientTrait>> {
|
||||
if available_nodes.is_empty() {
|
||||
return Err(crate::node::NodeError::NotEnoughNodes {
|
||||
needed: 1,
|
||||
available: 0,
|
||||
});
|
||||
}
|
||||
|
||||
// Random selection
|
||||
let seed = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_nanos() as u64;
|
||||
let idx = (seed as usize) % available_nodes.len();
|
||||
|
||||
Ok(available_nodes[idx].clone())
|
||||
}
|
||||
}
|
||||
|
||||
/// Round-robin node selector
|
||||
///
|
||||
/// Selects nodes in round-robin order. Good for load distribution.
|
||||
pub struct RoundRobinSelector {
|
||||
counter: std::sync::atomic::AtomicUsize,
|
||||
}
|
||||
|
||||
impl RoundRobinSelector {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
counter: std::sync::atomic::AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RoundRobinSelector {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl NodeSelector for RoundRobinSelector {
|
||||
async fn select_nodes(
|
||||
&self,
|
||||
available_nodes: &[Arc<dyn NodeClientTrait>],
|
||||
count: usize,
|
||||
) -> NodeResult<Vec<Arc<dyn NodeClientTrait>>> {
|
||||
if available_nodes.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let count = count.min(available_nodes.len());
|
||||
let start = self
|
||||
.counter
|
||||
.fetch_add(count, std::sync::atomic::Ordering::SeqCst);
|
||||
|
||||
Ok((0..count)
|
||||
.map(|i| {
|
||||
let idx = (start + i) % available_nodes.len();
|
||||
available_nodes[idx].clone()
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn select_for_read(
|
||||
&self,
|
||||
available_nodes: &[Arc<dyn NodeClientTrait>],
|
||||
_key: &str,
|
||||
) -> NodeResult<Arc<dyn NodeClientTrait>> {
|
||||
if available_nodes.is_empty() {
|
||||
return Err(crate::node::NodeError::NotEnoughNodes {
|
||||
needed: 1,
|
||||
available: 0,
|
||||
});
|
||||
}
|
||||
|
||||
let idx = self
|
||||
.counter
|
||||
.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
|
||||
% available_nodes.len();
|
||||
Ok(available_nodes[idx].clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::node::MockNodeClient;
|
||||
|
||||
fn create_mock_nodes(count: usize) -> Vec<Arc<dyn NodeClientTrait>> {
|
||||
(0..count)
|
||||
.map(|i| {
|
||||
Arc::new(MockNodeClient::new(
|
||||
format!("node-{}", i),
|
||||
format!("http://node-{}:9002", i),
|
||||
)) as Arc<dyn NodeClientTrait>
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_consistent_hash_deterministic_read() {
|
||||
let selector = ConsistentHashSelector::new();
|
||||
let nodes = create_mock_nodes(5);
|
||||
|
||||
let key = "test-object-123";
|
||||
|
||||
// Same key should always select the same node
|
||||
let node1 = selector.select_for_read(&nodes, key).await.unwrap();
|
||||
let node2 = selector.select_for_read(&nodes, key).await.unwrap();
|
||||
|
||||
assert_eq!(node1.node_id(), node2.node_id());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_consistent_hash_select_nodes() {
|
||||
let selector = ConsistentHashSelector::new();
|
||||
let nodes = create_mock_nodes(6);
|
||||
|
||||
let selected = selector.select_nodes(&nodes, 3).await.unwrap();
|
||||
|
||||
assert_eq!(selected.len(), 3);
|
||||
|
||||
// All selected nodes should be unique
|
||||
let ids: std::collections::HashSet<_> = selected.iter().map(|n| n.node_id()).collect();
|
||||
assert_eq!(ids.len(), 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_consistent_hash_select_more_than_available() {
|
||||
let selector = ConsistentHashSelector::new();
|
||||
let nodes = create_mock_nodes(3);
|
||||
|
||||
// Request more nodes than available
|
||||
let selected = selector.select_nodes(&nodes, 10).await.unwrap();
|
||||
|
||||
// Should return all available nodes
|
||||
assert_eq!(selected.len(), 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_random_selector() {
|
||||
let selector = RandomSelector::new();
|
||||
let nodes = create_mock_nodes(5);
|
||||
|
||||
let selected = selector.select_nodes(&nodes, 3).await.unwrap();
|
||||
assert_eq!(selected.len(), 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_round_robin_selector() {
|
||||
let selector = RoundRobinSelector::new();
|
||||
let nodes = create_mock_nodes(3);
|
||||
|
||||
// First selection
|
||||
let node1 = selector.select_for_read(&nodes, "key1").await.unwrap();
|
||||
// Second selection should be different
|
||||
let node2 = selector.select_for_read(&nodes, "key2").await.unwrap();
|
||||
// Third selection
|
||||
let node3 = selector.select_for_read(&nodes, "key3").await.unwrap();
|
||||
// Fourth should wrap around to first
|
||||
let node4 = selector.select_for_read(&nodes, "key4").await.unwrap();
|
||||
|
||||
// Verify round-robin behavior
|
||||
let ids: Vec<_> = [&node1, &node2, &node3, &node4]
|
||||
.iter()
|
||||
.map(|n| n.node_id())
|
||||
.collect();
|
||||
assert_eq!(ids[0], ids[3]); // Wrapped around
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_empty_nodes() {
|
||||
let selector = ConsistentHashSelector::new();
|
||||
let nodes: Vec<Arc<dyn NodeClientTrait>> = vec![];
|
||||
|
||||
let selected = selector.select_nodes(&nodes, 3).await.unwrap();
|
||||
assert!(selected.is_empty());
|
||||
|
||||
let result = selector.select_for_read(&nodes, "key").await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
51
lightningstor/crates/lightningstor-node/Cargo.toml
Normal file
51
lightningstor/crates/lightningstor-node/Cargo.toml
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
[package]
|
||||
name = "lightningstor-node"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
rust-version.workspace = true
|
||||
description = "LightningStor distributed storage node daemon"
|
||||
|
||||
[[bin]]
|
||||
name = "lightningstor-node"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
# Internal
|
||||
lightningstor-types = { workspace = true }
|
||||
lightningstor-storage = { workspace = true }
|
||||
|
||||
# gRPC
|
||||
tonic = { workspace = true }
|
||||
tonic-health = { workspace = true }
|
||||
prost = { workspace = true }
|
||||
prost-types = { workspace = true }
|
||||
|
||||
# Async runtime
|
||||
tokio = { workspace = true }
|
||||
tokio-stream = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
|
||||
# Utilities
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
dashmap = { workspace = true }
|
||||
bytes = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
metrics = { workspace = true }
|
||||
metrics-exporter-prometheus = { workspace = true }
|
||||
|
||||
[build-dependencies]
|
||||
tonic-build = { workspace = true }
|
||||
protoc-bin-vendored = "3"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
18
lightningstor/crates/lightningstor-node/build.rs
Normal file
18
lightningstor/crates/lightningstor-node/build.rs
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Prefer a toolchain-provided protoc (e.g. via `nix develop` which sets PROTOC),
|
||||
// but fall back to a vendored protoc when PROTOC isn't set.
|
||||
if std::env::var_os("PROTOC").is_none() {
|
||||
let protoc = protoc_bin_vendored::protoc_bin_path()?;
|
||||
std::env::set_var("PROTOC", protoc);
|
||||
}
|
||||
|
||||
tonic_build::configure()
|
||||
.build_server(true)
|
||||
.build_client(true)
|
||||
.compile_protos(&["proto/node.proto"], &["proto"])?;
|
||||
|
||||
println!("cargo:rerun-if-changed=proto/node.proto");
|
||||
println!("cargo:rerun-if-changed=proto");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
135
lightningstor/crates/lightningstor-node/proto/node.proto
Normal file
135
lightningstor/crates/lightningstor-node/proto/node.proto
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package lightningstor.node.v1;
|
||||
|
||||
option java_package = "com.lightningstor.node.v1";
|
||||
option go_package = "lightningstor/node/v1;nodev1";
|
||||
|
||||
import "google/protobuf/empty.proto";
|
||||
|
||||
// =============================================================================
|
||||
// Node Storage Service - Chunk-level operations for distributed storage
|
||||
// =============================================================================
|
||||
|
||||
service NodeService {
|
||||
// Chunk operations
|
||||
rpc PutChunk(PutChunkRequest) returns (PutChunkResponse);
|
||||
rpc GetChunk(GetChunkRequest) returns (GetChunkResponse);
|
||||
rpc DeleteChunk(DeleteChunkRequest) returns (google.protobuf.Empty);
|
||||
rpc ChunkExists(ChunkExistsRequest) returns (ChunkExistsResponse);
|
||||
rpc ChunkSize(ChunkSizeRequest) returns (ChunkSizeResponse);
|
||||
|
||||
// Health and status
|
||||
rpc Ping(PingRequest) returns (PingResponse);
|
||||
rpc GetStatus(GetStatusRequest) returns (GetStatusResponse);
|
||||
|
||||
// Batch operations for efficiency
|
||||
rpc BatchPutChunks(stream PutChunkRequest) returns (BatchPutChunksResponse);
|
||||
rpc BatchGetChunks(BatchGetChunksRequest) returns (stream GetChunkResponse);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Chunk Operations
|
||||
// =============================================================================
|
||||
|
||||
message PutChunkRequest {
|
||||
// Unique identifier for the chunk
|
||||
string chunk_id = 1;
|
||||
// Shard index (for erasure coding)
|
||||
uint32 shard_index = 2;
|
||||
// Whether this is a parity shard
|
||||
bool is_parity = 3;
|
||||
// Chunk data
|
||||
bytes data = 4;
|
||||
}
|
||||
|
||||
message PutChunkResponse {
|
||||
// Size of data stored
|
||||
uint64 size = 1;
|
||||
}
|
||||
|
||||
message GetChunkRequest {
|
||||
string chunk_id = 1;
|
||||
uint32 shard_index = 2;
|
||||
bool is_parity = 3;
|
||||
}
|
||||
|
||||
message GetChunkResponse {
|
||||
// Chunk data (or empty if streaming)
|
||||
bytes data = 1;
|
||||
// Size of the chunk
|
||||
uint64 size = 2;
|
||||
}
|
||||
|
||||
message DeleteChunkRequest {
|
||||
string chunk_id = 1;
|
||||
}
|
||||
|
||||
message ChunkExistsRequest {
|
||||
string chunk_id = 1;
|
||||
}
|
||||
|
||||
message ChunkExistsResponse {
|
||||
bool exists = 1;
|
||||
}
|
||||
|
||||
message ChunkSizeRequest {
|
||||
string chunk_id = 1;
|
||||
}
|
||||
|
||||
message ChunkSizeResponse {
|
||||
// Size in bytes, or 0 if not found
|
||||
uint64 size = 1;
|
||||
bool exists = 2;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Health and Status
|
||||
// =============================================================================
|
||||
|
||||
message PingRequest {}
|
||||
|
||||
message PingResponse {
|
||||
// Round-trip time in microseconds (server processing time)
|
||||
uint64 latency_us = 1;
|
||||
}
|
||||
|
||||
message GetStatusRequest {}
|
||||
|
||||
message GetStatusResponse {
|
||||
// Node identifier
|
||||
string node_id = 1;
|
||||
// Endpoint address
|
||||
string endpoint = 2;
|
||||
// Zone/rack for placement
|
||||
string zone = 3;
|
||||
// Region
|
||||
string region = 4;
|
||||
// Storage capacity in bytes
|
||||
uint64 capacity_bytes = 5;
|
||||
// Used storage in bytes
|
||||
uint64 used_bytes = 6;
|
||||
// Number of chunks stored
|
||||
uint64 chunk_count = 7;
|
||||
// Node is healthy and accepting requests
|
||||
bool healthy = 8;
|
||||
// Uptime in seconds
|
||||
uint64 uptime_seconds = 9;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Batch Operations
|
||||
// =============================================================================
|
||||
|
||||
message BatchPutChunksResponse {
|
||||
// Number of chunks successfully stored
|
||||
uint32 success_count = 1;
|
||||
// Number of chunks that failed
|
||||
uint32 failure_count = 2;
|
||||
// Error messages for failed chunks
|
||||
repeated string errors = 3;
|
||||
}
|
||||
|
||||
message BatchGetChunksRequest {
|
||||
repeated GetChunkRequest chunks = 1;
|
||||
}
|
||||
76
lightningstor/crates/lightningstor-node/src/config.rs
Normal file
76
lightningstor/crates/lightningstor-node/src/config.rs
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
//! Node configuration
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Storage node configuration
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NodeConfig {
|
||||
/// Unique node identifier
|
||||
#[serde(default = "default_node_id")]
|
||||
pub node_id: String,
|
||||
|
||||
/// gRPC address to listen on
|
||||
#[serde(default = "default_grpc_addr")]
|
||||
pub grpc_addr: SocketAddr,
|
||||
|
||||
/// Data directory for chunk storage
|
||||
#[serde(default = "default_data_dir")]
|
||||
pub data_dir: PathBuf,
|
||||
|
||||
/// Zone/rack identifier for placement
|
||||
#[serde(default)]
|
||||
pub zone: String,
|
||||
|
||||
/// Region identifier
|
||||
#[serde(default)]
|
||||
pub region: String,
|
||||
|
||||
/// Log level
|
||||
#[serde(default = "default_log_level")]
|
||||
pub log_level: String,
|
||||
|
||||
/// Maximum storage capacity in bytes (0 = unlimited)
|
||||
#[serde(default)]
|
||||
pub max_capacity_bytes: u64,
|
||||
|
||||
/// Metrics port for Prometheus scraping
|
||||
#[serde(default = "default_metrics_port")]
|
||||
pub metrics_port: u16,
|
||||
}
|
||||
|
||||
fn default_node_id() -> String {
|
||||
uuid::Uuid::new_v4().to_string()
|
||||
}
|
||||
|
||||
fn default_grpc_addr() -> SocketAddr {
|
||||
"0.0.0.0:9002".parse().unwrap()
|
||||
}
|
||||
|
||||
fn default_data_dir() -> PathBuf {
|
||||
PathBuf::from("/var/lib/lightningstor-node/data")
|
||||
}
|
||||
|
||||
fn default_log_level() -> String {
|
||||
"info".to_string()
|
||||
}
|
||||
|
||||
fn default_metrics_port() -> u16 {
|
||||
9098
|
||||
}
|
||||
|
||||
impl Default for NodeConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
node_id: default_node_id(),
|
||||
grpc_addr: default_grpc_addr(),
|
||||
data_dir: default_data_dir(),
|
||||
zone: String::new(),
|
||||
region: String::new(),
|
||||
log_level: default_log_level(),
|
||||
max_capacity_bytes: 0,
|
||||
metrics_port: default_metrics_port(),
|
||||
}
|
||||
}
|
||||
}
|
||||
36
lightningstor/crates/lightningstor-node/src/lib.rs
Normal file
36
lightningstor/crates/lightningstor-node/src/lib.rs
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
//! LightningStor Storage Node
|
||||
//!
|
||||
//! This crate implements a storage node for the LightningStor distributed
|
||||
//! storage system. Each node stores chunks of data and responds to requests
|
||||
//! from the main server for put, get, and delete operations.
|
||||
//!
|
||||
//! # Architecture
|
||||
//!
|
||||
//! ```text
|
||||
//! ┌─────────────────────────────────────────────────────┐
|
||||
//! │ LightningStor Server │
|
||||
//! │ (Erasure Coding / Replication Coordination) │
|
||||
//! └───────────┬───────────────┬───────────────┬─────────┘
|
||||
//! │ │ │
|
||||
//! ▼ ▼ ▼
|
||||
//! ┌───────────┐ ┌───────────┐ ┌───────────┐
|
||||
//! │ Node 1 │ │ Node 2 │ │ Node 3 │
|
||||
//! │ (gRPC) │ │ (gRPC) │ │ (gRPC) │
|
||||
//! └───────────┘ └───────────┘ └───────────┘
|
||||
//! ```
|
||||
|
||||
pub mod config;
|
||||
pub mod service;
|
||||
pub mod storage;
|
||||
|
||||
pub use config::NodeConfig;
|
||||
pub use service::NodeServiceImpl;
|
||||
pub use storage::LocalChunkStore;
|
||||
|
||||
/// Re-export generated protobuf types
|
||||
pub mod proto {
|
||||
tonic::include_proto!("lightningstor.node.v1");
|
||||
}
|
||||
|
||||
pub use proto::node_service_client::NodeServiceClient;
|
||||
pub use proto::node_service_server::{NodeService, NodeServiceServer};
|
||||
169
lightningstor/crates/lightningstor-node/src/main.rs
Normal file
169
lightningstor/crates/lightningstor-node/src/main.rs
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
//! LightningStor storage node daemon
|
||||
|
||||
use clap::Parser;
|
||||
use lightningstor_node::{
|
||||
proto::node_service_server::NodeServiceServer, LocalChunkStore, NodeConfig, NodeServiceImpl,
|
||||
};
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use tonic::transport::Server;
|
||||
use tonic_health::server::health_reporter;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
/// LightningStor storage node
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
/// Configuration file path
|
||||
#[arg(short, long, default_value = "lightningstor-node.toml")]
|
||||
config: PathBuf,
|
||||
|
||||
/// Node ID (overrides config)
|
||||
#[arg(long, env = "LIGHTNINGSTOR_NODE_ID")]
|
||||
node_id: Option<String>,
|
||||
|
||||
/// gRPC address to listen on (overrides config)
|
||||
#[arg(long)]
|
||||
grpc_addr: Option<String>,
|
||||
|
||||
/// Data directory (overrides config)
|
||||
#[arg(long)]
|
||||
data_dir: Option<PathBuf>,
|
||||
|
||||
/// Zone identifier (overrides config)
|
||||
#[arg(long)]
|
||||
zone: Option<String>,
|
||||
|
||||
/// Region identifier (overrides config)
|
||||
#[arg(long)]
|
||||
region: Option<String>,
|
||||
|
||||
/// Log level (overrides config)
|
||||
#[arg(short, long)]
|
||||
log_level: Option<String>,
|
||||
|
||||
/// Maximum storage capacity in bytes (overrides config)
|
||||
#[arg(long)]
|
||||
max_capacity: Option<u64>,
|
||||
|
||||
/// Metrics port for Prometheus scraping
|
||||
#[arg(long)]
|
||||
metrics_port: Option<u16>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let args = Args::parse();
|
||||
|
||||
// Load configuration from file or use defaults
|
||||
let mut config = if args.config.exists() {
|
||||
let contents = tokio::fs::read_to_string(&args.config).await?;
|
||||
toml::from_str(&contents)?
|
||||
} else {
|
||||
eprintln!(
|
||||
"Config file not found: {}, using defaults",
|
||||
args.config.display()
|
||||
);
|
||||
NodeConfig::default()
|
||||
};
|
||||
|
||||
// Apply command line overrides
|
||||
if let Some(node_id) = args.node_id {
|
||||
config.node_id = node_id;
|
||||
}
|
||||
if let Some(grpc_addr) = args.grpc_addr {
|
||||
config.grpc_addr = grpc_addr.parse()?;
|
||||
}
|
||||
if let Some(data_dir) = args.data_dir {
|
||||
config.data_dir = data_dir;
|
||||
}
|
||||
if let Some(zone) = args.zone {
|
||||
config.zone = zone;
|
||||
}
|
||||
if let Some(region) = args.region {
|
||||
config.region = region;
|
||||
}
|
||||
if let Some(log_level) = args.log_level {
|
||||
config.log_level = log_level;
|
||||
}
|
||||
if let Some(max_capacity) = args.max_capacity {
|
||||
config.max_capacity_bytes = max_capacity;
|
||||
}
|
||||
if let Some(metrics_port) = args.metrics_port {
|
||||
config.metrics_port = metrics_port;
|
||||
}
|
||||
|
||||
// Initialize tracing
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| EnvFilter::new(&config.log_level)),
|
||||
)
|
||||
.init();
|
||||
|
||||
tracing::info!("Starting LightningStor storage node");
|
||||
tracing::info!(" Node ID: {}", config.node_id);
|
||||
tracing::info!(" gRPC: {}", config.grpc_addr);
|
||||
tracing::info!(" Data dir: {}", config.data_dir.display());
|
||||
if !config.zone.is_empty() {
|
||||
tracing::info!(" Zone: {}", config.zone);
|
||||
}
|
||||
if !config.region.is_empty() {
|
||||
tracing::info!(" Region: {}", config.region);
|
||||
}
|
||||
if config.max_capacity_bytes > 0 {
|
||||
tracing::info!(
|
||||
" Max capacity: {} bytes",
|
||||
config.max_capacity_bytes
|
||||
);
|
||||
}
|
||||
|
||||
// Initialize Prometheus metrics exporter
|
||||
let metrics_addr = format!("0.0.0.0:{}", config.metrics_port);
|
||||
let builder = PrometheusBuilder::new();
|
||||
builder
|
||||
.with_http_listener(metrics_addr.parse::<std::net::SocketAddr>()?)
|
||||
.install()
|
||||
.expect("Failed to install Prometheus metrics exporter");
|
||||
|
||||
tracing::info!(
|
||||
"Prometheus metrics available at http://{}/metrics",
|
||||
metrics_addr
|
||||
);
|
||||
|
||||
// Create local chunk store
|
||||
let store = Arc::new(
|
||||
LocalChunkStore::new(config.data_dir.clone(), config.max_capacity_bytes)
|
||||
.await
|
||||
.expect("Failed to create chunk store"),
|
||||
);
|
||||
|
||||
tracing::info!(
|
||||
"Chunk store initialized: {} chunks, {} bytes",
|
||||
store.chunk_count(),
|
||||
store.total_bytes()
|
||||
);
|
||||
|
||||
// Create service
|
||||
let config = Arc::new(config);
|
||||
let service = NodeServiceImpl::new(store.clone(), config.clone());
|
||||
|
||||
// Setup health service
|
||||
let (mut health_reporter, health_service) = health_reporter();
|
||||
health_reporter
|
||||
.set_serving::<NodeServiceServer<NodeServiceImpl>>()
|
||||
.await;
|
||||
|
||||
// Start gRPC server
|
||||
let addr = config.grpc_addr;
|
||||
tracing::info!("gRPC server listening on {}", addr);
|
||||
|
||||
Server::builder()
|
||||
.add_service(health_service)
|
||||
.add_service(NodeServiceServer::new(service))
|
||||
.serve(addr)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
232
lightningstor/crates/lightningstor-node/src/service.rs
Normal file
232
lightningstor/crates/lightningstor-node/src/service.rs
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
//! gRPC service implementation for storage node
|
||||
|
||||
use crate::proto::{
|
||||
node_service_server::NodeService, BatchGetChunksRequest, BatchPutChunksResponse,
|
||||
ChunkExistsRequest, ChunkExistsResponse, ChunkSizeRequest, ChunkSizeResponse,
|
||||
DeleteChunkRequest, GetChunkRequest, GetChunkResponse, GetStatusRequest, GetStatusResponse,
|
||||
PingRequest, PingResponse, PutChunkRequest, PutChunkResponse,
|
||||
};
|
||||
use crate::storage::LocalChunkStore;
|
||||
use crate::NodeConfig;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tonic::{Request, Response, Status, Streaming};
|
||||
use tracing::{debug, error};
|
||||
|
||||
/// Implementation of the NodeService gRPC service
|
||||
pub struct NodeServiceImpl {
|
||||
/// Local chunk storage
|
||||
store: Arc<LocalChunkStore>,
|
||||
/// Node configuration
|
||||
config: Arc<NodeConfig>,
|
||||
/// Server start time
|
||||
start_time: Instant,
|
||||
}
|
||||
|
||||
impl NodeServiceImpl {
|
||||
/// Create a new node service
|
||||
pub fn new(store: Arc<LocalChunkStore>, config: Arc<NodeConfig>) -> Self {
|
||||
Self {
|
||||
store,
|
||||
config,
|
||||
start_time: Instant::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl NodeService for NodeServiceImpl {
|
||||
async fn put_chunk(
|
||||
&self,
|
||||
request: Request<PutChunkRequest>,
|
||||
) -> Result<Response<PutChunkResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
debug!(
|
||||
chunk_id = %req.chunk_id,
|
||||
shard_index = req.shard_index,
|
||||
is_parity = req.is_parity,
|
||||
size = req.data.len(),
|
||||
"PutChunk request"
|
||||
);
|
||||
|
||||
let size = self
|
||||
.store
|
||||
.put(&req.chunk_id, &req.data)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!(error = ?e, "Failed to put chunk");
|
||||
Status::internal(e.to_string())
|
||||
})?;
|
||||
|
||||
metrics::counter!("node_chunks_stored").increment(1);
|
||||
metrics::counter!("node_bytes_stored").increment(size);
|
||||
|
||||
Ok(Response::new(PutChunkResponse { size }))
|
||||
}
|
||||
|
||||
async fn get_chunk(
|
||||
&self,
|
||||
request: Request<GetChunkRequest>,
|
||||
) -> Result<Response<GetChunkResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
debug!(
|
||||
chunk_id = %req.chunk_id,
|
||||
shard_index = req.shard_index,
|
||||
is_parity = req.is_parity,
|
||||
"GetChunk request"
|
||||
);
|
||||
|
||||
let data = self.store.get(&req.chunk_id).await.map_err(|e| {
|
||||
match &e {
|
||||
crate::storage::StorageError::NotFound(_) => {
|
||||
debug!(chunk_id = %req.chunk_id, "Chunk not found");
|
||||
Status::not_found(e.to_string())
|
||||
}
|
||||
_ => {
|
||||
error!(error = ?e, "Failed to get chunk");
|
||||
Status::internal(e.to_string())
|
||||
}
|
||||
}
|
||||
})?;
|
||||
|
||||
metrics::counter!("node_chunks_retrieved").increment(1);
|
||||
metrics::counter!("node_bytes_retrieved").increment(data.len() as u64);
|
||||
|
||||
Ok(Response::new(GetChunkResponse {
|
||||
data,
|
||||
size: 0, // Size is implicit from data.len()
|
||||
}))
|
||||
}
|
||||
|
||||
async fn delete_chunk(
|
||||
&self,
|
||||
request: Request<DeleteChunkRequest>,
|
||||
) -> Result<Response<()>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
debug!(chunk_id = %req.chunk_id, "DeleteChunk request");
|
||||
|
||||
self.store.delete(&req.chunk_id).await.map_err(|e| {
|
||||
error!(error = ?e, "Failed to delete chunk");
|
||||
Status::internal(e.to_string())
|
||||
})?;
|
||||
|
||||
metrics::counter!("node_chunks_deleted").increment(1);
|
||||
|
||||
Ok(Response::new(()))
|
||||
}
|
||||
|
||||
async fn chunk_exists(
|
||||
&self,
|
||||
request: Request<ChunkExistsRequest>,
|
||||
) -> Result<Response<ChunkExistsResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
let exists = self.store.exists(&req.chunk_id);
|
||||
|
||||
Ok(Response::new(ChunkExistsResponse { exists }))
|
||||
}
|
||||
|
||||
async fn chunk_size(
|
||||
&self,
|
||||
request: Request<ChunkSizeRequest>,
|
||||
) -> Result<Response<ChunkSizeResponse>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
match self.store.size(&req.chunk_id) {
|
||||
Some(size) => Ok(Response::new(ChunkSizeResponse { size, exists: true })),
|
||||
None => Ok(Response::new(ChunkSizeResponse {
|
||||
size: 0,
|
||||
exists: false,
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
async fn ping(&self, _request: Request<PingRequest>) -> Result<Response<PingResponse>, Status> {
|
||||
let start = Instant::now();
|
||||
// Minimal processing - just measure latency
|
||||
let latency_us = start.elapsed().as_micros() as u64;
|
||||
|
||||
Ok(Response::new(PingResponse { latency_us }))
|
||||
}
|
||||
|
||||
async fn get_status(
|
||||
&self,
|
||||
_request: Request<GetStatusRequest>,
|
||||
) -> Result<Response<GetStatusResponse>, Status> {
|
||||
let uptime_seconds = self.start_time.elapsed().as_secs();
|
||||
|
||||
Ok(Response::new(GetStatusResponse {
|
||||
node_id: self.config.node_id.clone(),
|
||||
endpoint: self.config.grpc_addr.to_string(),
|
||||
zone: self.config.zone.clone(),
|
||||
region: self.config.region.clone(),
|
||||
capacity_bytes: self.store.max_capacity(),
|
||||
used_bytes: self.store.total_bytes(),
|
||||
chunk_count: self.store.chunk_count(),
|
||||
healthy: true,
|
||||
uptime_seconds,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn batch_put_chunks(
|
||||
&self,
|
||||
request: Request<Streaming<PutChunkRequest>>,
|
||||
) -> Result<Response<BatchPutChunksResponse>, Status> {
|
||||
let mut stream = request.into_inner();
|
||||
let mut success_count = 0u32;
|
||||
let mut failure_count = 0u32;
|
||||
let mut errors = Vec::new();
|
||||
|
||||
while let Some(req) = stream.message().await? {
|
||||
match self.store.put(&req.chunk_id, &req.data).await {
|
||||
Ok(size) => {
|
||||
success_count += 1;
|
||||
metrics::counter!("node_chunks_stored").increment(1);
|
||||
metrics::counter!("node_bytes_stored").increment(size);
|
||||
}
|
||||
Err(e) => {
|
||||
failure_count += 1;
|
||||
errors.push(format!("{}: {}", req.chunk_id, e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Response::new(BatchPutChunksResponse {
|
||||
success_count,
|
||||
failure_count,
|
||||
errors,
|
||||
}))
|
||||
}
|
||||
|
||||
type BatchGetChunksStream = ReceiverStream<Result<GetChunkResponse, Status>>;
|
||||
|
||||
async fn batch_get_chunks(
|
||||
&self,
|
||||
request: Request<BatchGetChunksRequest>,
|
||||
) -> Result<Response<Self::BatchGetChunksStream>, Status> {
|
||||
let req = request.into_inner();
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(32);
|
||||
let store = self.store.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
for chunk_req in req.chunks {
|
||||
let result = match store.get(&chunk_req.chunk_id).await {
|
||||
Ok(data) => {
|
||||
let size = data.len() as u64;
|
||||
Ok(GetChunkResponse { data, size })
|
||||
}
|
||||
Err(e) => Err(Status::not_found(e.to_string())),
|
||||
};
|
||||
|
||||
if tx.send(result).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Response::new(ReceiverStream::new(rx)))
|
||||
}
|
||||
}
|
||||
313
lightningstor/crates/lightningstor-node/src/storage.rs
Normal file
313
lightningstor/crates/lightningstor-node/src/storage.rs
Normal file
|
|
@ -0,0 +1,313 @@
|
|||
//! Local chunk storage
|
||||
|
||||
use dashmap::DashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use thiserror::Error;
|
||||
use tokio::fs;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tracing::debug;
|
||||
|
||||
/// Errors from chunk storage operations
|
||||
#[derive(Debug, Error)]
|
||||
pub enum StorageError {
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("Chunk not found: {0}")]
|
||||
NotFound(String),
|
||||
|
||||
#[error("Storage capacity exceeded")]
|
||||
CapacityExceeded,
|
||||
}
|
||||
|
||||
pub type StorageResult<T> = Result<T, StorageError>;
|
||||
|
||||
/// Local filesystem-based chunk storage
|
||||
pub struct LocalChunkStore {
|
||||
/// Data directory
|
||||
data_dir: PathBuf,
|
||||
|
||||
/// In-memory index of chunk sizes for fast lookups
|
||||
chunk_sizes: DashMap<String, u64>,
|
||||
|
||||
/// Total bytes stored
|
||||
total_bytes: AtomicU64,
|
||||
|
||||
/// Maximum capacity (0 = unlimited)
|
||||
max_capacity: u64,
|
||||
|
||||
/// Number of chunks stored
|
||||
chunk_count: AtomicU64,
|
||||
}
|
||||
|
||||
impl LocalChunkStore {
|
||||
/// Create a new local chunk store
|
||||
pub async fn new(data_dir: PathBuf, max_capacity: u64) -> StorageResult<Self> {
|
||||
// Ensure data directory exists
|
||||
fs::create_dir_all(&data_dir).await?;
|
||||
|
||||
let store = Self {
|
||||
data_dir,
|
||||
chunk_sizes: DashMap::new(),
|
||||
total_bytes: AtomicU64::new(0),
|
||||
max_capacity,
|
||||
chunk_count: AtomicU64::new(0),
|
||||
};
|
||||
|
||||
// Scan existing chunks
|
||||
store.scan_existing_chunks().await?;
|
||||
|
||||
Ok(store)
|
||||
}
|
||||
|
||||
/// Scan existing chunks in the data directory
|
||||
async fn scan_existing_chunks(&self) -> StorageResult<()> {
|
||||
let mut entries = fs::read_dir(&self.data_dir).await?;
|
||||
let mut total_bytes = 0u64;
|
||||
let mut chunk_count = 0u64;
|
||||
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.is_file() {
|
||||
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||
if let Ok(metadata) = entry.metadata().await {
|
||||
let size = metadata.len();
|
||||
self.chunk_sizes.insert(name.to_string(), size);
|
||||
total_bytes += size;
|
||||
chunk_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.total_bytes.store(total_bytes, Ordering::SeqCst);
|
||||
self.chunk_count.store(chunk_count, Ordering::SeqCst);
|
||||
|
||||
debug!(
|
||||
total_bytes,
|
||||
chunk_count,
|
||||
"Scanned existing chunks"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the path for a chunk
|
||||
fn chunk_path(&self, chunk_id: &str) -> PathBuf {
|
||||
// Sanitize chunk_id to be a valid filename
|
||||
let safe_id = chunk_id.replace(['/', '\\', ':', '*', '?', '"', '<', '>', '|'], "_");
|
||||
self.data_dir.join(safe_id)
|
||||
}
|
||||
|
||||
/// Store a chunk
|
||||
pub async fn put(&self, chunk_id: &str, data: &[u8]) -> StorageResult<u64> {
|
||||
let size = data.len() as u64;
|
||||
|
||||
// Check capacity
|
||||
if self.max_capacity > 0 {
|
||||
let current = self.total_bytes.load(Ordering::SeqCst);
|
||||
if current + size > self.max_capacity {
|
||||
return Err(StorageError::CapacityExceeded);
|
||||
}
|
||||
}
|
||||
|
||||
let path = self.chunk_path(chunk_id);
|
||||
|
||||
// Check if replacing existing chunk
|
||||
let old_size = self.chunk_sizes.get(chunk_id).map(|v| *v).unwrap_or(0);
|
||||
|
||||
// Write data
|
||||
let mut file = fs::File::create(&path).await?;
|
||||
file.write_all(data).await?;
|
||||
file.sync_all().await?;
|
||||
|
||||
// Update index
|
||||
self.chunk_sizes.insert(chunk_id.to_string(), size);
|
||||
|
||||
// Update totals
|
||||
if old_size > 0 {
|
||||
// Replacing existing chunk
|
||||
self.total_bytes.fetch_sub(old_size, Ordering::SeqCst);
|
||||
} else {
|
||||
// New chunk
|
||||
self.chunk_count.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
self.total_bytes.fetch_add(size, Ordering::SeqCst);
|
||||
|
||||
debug!(chunk_id, size, "Stored chunk");
|
||||
|
||||
Ok(size)
|
||||
}
|
||||
|
||||
/// Retrieve a chunk
|
||||
pub async fn get(&self, chunk_id: &str) -> StorageResult<Vec<u8>> {
|
||||
let path = self.chunk_path(chunk_id);
|
||||
|
||||
if !path.exists() {
|
||||
return Err(StorageError::NotFound(chunk_id.to_string()));
|
||||
}
|
||||
|
||||
let mut file = fs::File::open(&path).await?;
|
||||
let mut data = Vec::new();
|
||||
file.read_to_end(&mut data).await?;
|
||||
|
||||
debug!(chunk_id, size = data.len(), "Retrieved chunk");
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
/// Delete a chunk
|
||||
pub async fn delete(&self, chunk_id: &str) -> StorageResult<()> {
|
||||
let path = self.chunk_path(chunk_id);
|
||||
|
||||
if let Some((_, size)) = self.chunk_sizes.remove(chunk_id) {
|
||||
if path.exists() {
|
||||
fs::remove_file(&path).await?;
|
||||
}
|
||||
self.total_bytes.fetch_sub(size, Ordering::SeqCst);
|
||||
self.chunk_count.fetch_sub(1, Ordering::SeqCst);
|
||||
debug!(chunk_id, "Deleted chunk");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if a chunk exists
|
||||
pub fn exists(&self, chunk_id: &str) -> bool {
|
||||
self.chunk_sizes.contains_key(chunk_id)
|
||||
}
|
||||
|
||||
/// Get the size of a chunk
|
||||
pub fn size(&self, chunk_id: &str) -> Option<u64> {
|
||||
self.chunk_sizes.get(chunk_id).map(|v| *v)
|
||||
}
|
||||
|
||||
/// Get total bytes stored
|
||||
pub fn total_bytes(&self) -> u64 {
|
||||
self.total_bytes.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
/// Get chunk count
|
||||
pub fn chunk_count(&self) -> u64 {
|
||||
self.chunk_count.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
/// Get maximum capacity
|
||||
pub fn max_capacity(&self) -> u64 {
|
||||
self.max_capacity
|
||||
}
|
||||
|
||||
/// Get available capacity
|
||||
pub fn available_bytes(&self) -> u64 {
|
||||
if self.max_capacity == 0 {
|
||||
u64::MAX
|
||||
} else {
|
||||
self.max_capacity.saturating_sub(self.total_bytes())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::TempDir;
|
||||
|
||||
async fn create_test_store() -> (LocalChunkStore, TempDir) {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let store = LocalChunkStore::new(temp_dir.path().to_path_buf(), 0)
|
||||
.await
|
||||
.unwrap();
|
||||
(store, temp_dir)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_put_get() {
|
||||
let (store, _temp) = create_test_store().await;
|
||||
|
||||
let chunk_id = "test-chunk-1";
|
||||
let data = vec![42u8; 1024];
|
||||
|
||||
let size = store.put(chunk_id, &data).await.unwrap();
|
||||
assert_eq!(size, 1024);
|
||||
|
||||
let retrieved = store.get(chunk_id).await.unwrap();
|
||||
assert_eq!(retrieved, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete() {
|
||||
let (store, _temp) = create_test_store().await;
|
||||
|
||||
let chunk_id = "test-chunk-2";
|
||||
let data = vec![42u8; 512];
|
||||
|
||||
store.put(chunk_id, &data).await.unwrap();
|
||||
assert!(store.exists(chunk_id));
|
||||
|
||||
store.delete(chunk_id).await.unwrap();
|
||||
assert!(!store.exists(chunk_id));
|
||||
|
||||
let result = store.get(chunk_id).await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_size_tracking() {
|
||||
let (store, _temp) = create_test_store().await;
|
||||
|
||||
assert_eq!(store.total_bytes(), 0);
|
||||
assert_eq!(store.chunk_count(), 0);
|
||||
|
||||
store.put("chunk1", &vec![0u8; 100]).await.unwrap();
|
||||
assert_eq!(store.total_bytes(), 100);
|
||||
assert_eq!(store.chunk_count(), 1);
|
||||
|
||||
store.put("chunk2", &vec![0u8; 200]).await.unwrap();
|
||||
assert_eq!(store.total_bytes(), 300);
|
||||
assert_eq!(store.chunk_count(), 2);
|
||||
|
||||
store.delete("chunk1").await.unwrap();
|
||||
assert_eq!(store.total_bytes(), 200);
|
||||
assert_eq!(store.chunk_count(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_capacity_limit() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let store = LocalChunkStore::new(temp_dir.path().to_path_buf(), 1000)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Should succeed
|
||||
store.put("chunk1", &vec![0u8; 500]).await.unwrap();
|
||||
|
||||
// Should fail - would exceed capacity
|
||||
let result = store.put("chunk2", &vec![0u8; 600]).await;
|
||||
assert!(matches!(result, Err(StorageError::CapacityExceeded)));
|
||||
|
||||
// Should succeed - within remaining capacity
|
||||
store.put("chunk2", &vec![0u8; 400]).await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_replace_chunk() {
|
||||
let (store, _temp) = create_test_store().await;
|
||||
|
||||
let chunk_id = "test-chunk";
|
||||
|
||||
store.put(chunk_id, &vec![0u8; 100]).await.unwrap();
|
||||
assert_eq!(store.total_bytes(), 100);
|
||||
assert_eq!(store.chunk_count(), 1);
|
||||
|
||||
// Replace with larger data
|
||||
store.put(chunk_id, &vec![0u8; 200]).await.unwrap();
|
||||
assert_eq!(store.total_bytes(), 200);
|
||||
assert_eq!(store.chunk_count(), 1); // Still 1 chunk
|
||||
|
||||
// Replace with smaller data
|
||||
store.put(chunk_id, &vec![0u8; 50]).await.unwrap();
|
||||
assert_eq!(store.total_bytes(), 50);
|
||||
assert_eq!(store.chunk_count(), 1);
|
||||
}
|
||||
}
|
||||
59
lightningstor/crates/lightningstor-server/src/tenant.rs
Normal file
59
lightningstor/crates/lightningstor-server/src/tenant.rs
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
use tonic::{metadata::MetadataMap, Status};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TenantContext {
|
||||
pub org_id: String,
|
||||
pub project_id: String,
|
||||
}
|
||||
|
||||
fn metadata_value(metadata: &MetadataMap, key: &str) -> Option<String> {
|
||||
metadata
|
||||
.get(key)
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.map(|value| value.trim().to_string())
|
||||
.filter(|value| !value.is_empty())
|
||||
}
|
||||
|
||||
fn org_from_metadata(metadata: &MetadataMap) -> Option<String> {
|
||||
metadata_value(metadata, "org-id")
|
||||
.or_else(|| metadata_value(metadata, "x-org-id"))
|
||||
.or_else(|| metadata_value(metadata, "org_id"))
|
||||
}
|
||||
|
||||
fn project_from_metadata(metadata: &MetadataMap) -> Option<String> {
|
||||
metadata_value(metadata, "project-id")
|
||||
.or_else(|| metadata_value(metadata, "x-project-id"))
|
||||
.or_else(|| metadata_value(metadata, "project_id"))
|
||||
}
|
||||
|
||||
pub fn resolve_org(
|
||||
metadata: &MetadataMap,
|
||||
org_id: Option<String>,
|
||||
) -> Result<String, Status> {
|
||||
org_id
|
||||
.filter(|value| !value.is_empty())
|
||||
.or_else(|| org_from_metadata(metadata))
|
||||
.ok_or_else(|| Status::invalid_argument("org_id is required"))
|
||||
}
|
||||
|
||||
pub fn resolve_org_project_optional(
|
||||
metadata: &MetadataMap,
|
||||
org_id: Option<String>,
|
||||
project_id: Option<String>,
|
||||
) -> Result<(String, Option<String>), Status> {
|
||||
let org_id = resolve_org(metadata, org_id)?;
|
||||
let project_id = project_id
|
||||
.filter(|value| !value.is_empty())
|
||||
.or_else(|| project_from_metadata(metadata));
|
||||
Ok((org_id, project_id))
|
||||
}
|
||||
|
||||
pub fn resolve_tenant(
|
||||
metadata: &MetadataMap,
|
||||
org_id: Option<String>,
|
||||
project_id: Option<String>,
|
||||
) -> Result<TenantContext, Status> {
|
||||
let (org_id, project_id) = resolve_org_project_optional(metadata, org_id, project_id)?;
|
||||
let project_id = project_id.ok_or_else(|| Status::invalid_argument("project_id is required"))?;
|
||||
Ok(TenantContext { org_id, project_id })
|
||||
}
|
||||
1954
mtls-agent/Cargo.lock
generated
Normal file
1954
mtls-agent/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
23
mtls-agent/Cargo.toml
Normal file
23
mtls-agent/Cargo.toml
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
[package]
|
||||
name = "mtls-agent"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
tokio = { version = "1.38", features = ["full"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
toml = "0.8"
|
||||
|
||||
rustls = { version = "0.23", default-features = false, features = ["std", "tls12"] }
|
||||
tokio-rustls = "0.26"
|
||||
rustls-pemfile = "2"
|
||||
webpki-roots = "0.26"
|
||||
|
||||
chainfire-client = { path = "../chainfire/chainfire-client" }
|
||||
|
||||
|
||||
89
mtls-agent/src/client.rs
Normal file
89
mtls-agent/src/client.rs
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use rustls::{pki_types::ServerName, ClientConfig, RootCertStore};
|
||||
use rustls_pemfile::certs;
|
||||
use std::fs;
|
||||
use std::io::BufReader;
|
||||
use tokio::net::TcpStream;
|
||||
use tokio_rustls::TlsConnector;
|
||||
|
||||
use crate::discovery::ServiceDiscovery;
|
||||
|
||||
pub struct MtlsClient {
|
||||
discovery: Arc<ServiceDiscovery>,
|
||||
tls_config: Option<Arc<ClientConfig>>,
|
||||
}
|
||||
|
||||
impl MtlsClient {
|
||||
pub fn new(discovery: Arc<ServiceDiscovery>) -> Self {
|
||||
Self {
|
||||
discovery,
|
||||
tls_config: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_tls_config(mut self, config: Arc<ClientConfig>) -> Self {
|
||||
self.tls_config = Some(config);
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn connect_to_service(
|
||||
&self,
|
||||
service_name: &str,
|
||||
use_mtls: bool,
|
||||
) -> Result<TcpStream> {
|
||||
let instances = self.discovery.resolve_service(service_name).await?;
|
||||
if instances.is_empty() {
|
||||
anyhow::bail!("no healthy instances found for service {}", service_name);
|
||||
}
|
||||
|
||||
// ラウンドロビン(簡易実装)
|
||||
let instance = instances[0].clone();
|
||||
|
||||
let addr = if let Some(mesh_port) = instance.mesh_port {
|
||||
format!("{}:{}", instance.ip, mesh_port)
|
||||
} else {
|
||||
format!("{}:{}", instance.ip, instance.port)
|
||||
};
|
||||
|
||||
let stream = TcpStream::connect(&addr).await?;
|
||||
|
||||
// TODO: mTLS対応
|
||||
if use_mtls {
|
||||
return Err(anyhow::anyhow!("mTLS client connection not fully implemented"));
|
||||
}
|
||||
|
||||
Ok(stream)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_client_config(
|
||||
ca_cert_path: Option<&str>,
|
||||
client_cert_path: Option<&str>,
|
||||
client_key_path: Option<&str>,
|
||||
) -> Result<Arc<ClientConfig>> {
|
||||
let mut roots = RootCertStore::empty();
|
||||
|
||||
if let Some(ca_path) = ca_cert_path {
|
||||
let certs = certs(&mut BufReader::new(fs::File::open(ca_path)?))
|
||||
.collect::<std::result::Result<Vec<_>, _>>()?;
|
||||
roots.add_parsable_certificates(certs);
|
||||
} else {
|
||||
// システムのルート証明書を使用
|
||||
roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());
|
||||
}
|
||||
|
||||
let mut config_builder = ClientConfig::builder()
|
||||
.with_root_certificates(roots)
|
||||
.with_no_client_auth();
|
||||
|
||||
// クライアント証明書が指定されている場合は設定
|
||||
if let (Some(cert_path), Some(key_path)) = (client_cert_path, client_key_path) {
|
||||
// TODO: クライアント証明書の読み込みと設定
|
||||
// 現時点ではサーバー認証のみ
|
||||
}
|
||||
|
||||
Ok(Arc::new(config_builder))
|
||||
}
|
||||
|
||||
219
mtls-agent/src/discovery.rs
Normal file
219
mtls-agent/src/discovery.rs
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::Result;
|
||||
use chainfire_client::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::{info, warn};
|
||||
|
||||
const PHOTON_PREFIX: &str = "photoncloud";
|
||||
const CACHE_TTL: Duration = Duration::from_secs(30);
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ServiceInstance {
|
||||
pub instance_id: String,
|
||||
pub service: String,
|
||||
pub node_id: String,
|
||||
pub ip: String,
|
||||
pub port: u16,
|
||||
#[serde(default)]
|
||||
pub mesh_port: Option<u16>,
|
||||
#[serde(default)]
|
||||
pub version: Option<String>,
|
||||
#[serde(default)]
|
||||
pub state: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MtlsPolicy {
|
||||
pub policy_id: String,
|
||||
#[serde(default)]
|
||||
pub environment: Option<String>,
|
||||
pub source_service: String,
|
||||
pub target_service: String,
|
||||
#[serde(default)]
|
||||
pub mtls_required: Option<bool>,
|
||||
#[serde(default)]
|
||||
pub mode: Option<String>,
|
||||
}
|
||||
|
||||
struct CachedInstances {
|
||||
instances: Vec<ServiceInstance>,
|
||||
updated_at: Instant,
|
||||
}
|
||||
|
||||
pub struct ServiceDiscovery {
|
||||
chainfire_endpoint: String,
|
||||
cluster_id: String,
|
||||
cache: Arc<RwLock<HashMap<String, CachedInstances>>>,
|
||||
policy_cache: Arc<RwLock<HashMap<String, MtlsPolicy>>>,
|
||||
}
|
||||
|
||||
impl ServiceDiscovery {
|
||||
pub fn new(chainfire_endpoint: String, cluster_id: String) -> Self {
|
||||
Self {
|
||||
chainfire_endpoint,
|
||||
cluster_id,
|
||||
cache: Arc::new(RwLock::new(HashMap::new())),
|
||||
policy_cache: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn resolve_service(&self, service_name: &str) -> Result<Vec<ServiceInstance>> {
|
||||
// キャッシュをチェック
|
||||
{
|
||||
let cache = self.cache.read().await;
|
||||
if let Some(cached) = cache.get(service_name) {
|
||||
if cached.updated_at.elapsed() < CACHE_TTL {
|
||||
return Ok(cached.instances.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Chainfireから取得
|
||||
let instances = self.fetch_instances_from_chainfire(service_name).await?;
|
||||
|
||||
// キャッシュを更新
|
||||
{
|
||||
let mut cache = self.cache.write().await;
|
||||
cache.insert(
|
||||
service_name.to_string(),
|
||||
CachedInstances {
|
||||
instances: instances.clone(),
|
||||
updated_at: Instant::now(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
Ok(instances)
|
||||
}
|
||||
|
||||
async fn fetch_instances_from_chainfire(&self, service_name: &str) -> Result<Vec<ServiceInstance>> {
|
||||
let mut client = Client::connect(self.chainfire_endpoint.clone()).await?;
|
||||
let prefix = format!(
|
||||
"{}instances/{}/",
|
||||
cluster_prefix(&self.cluster_id),
|
||||
service_name
|
||||
);
|
||||
let prefix_bytes = prefix.as_bytes();
|
||||
|
||||
let (kvs, _) = client.scan_prefix(prefix_bytes, 0).await?;
|
||||
let mut instances = Vec::new();
|
||||
|
||||
for (_, value, _) in kvs {
|
||||
match serde_json::from_slice::<ServiceInstance>(&value) {
|
||||
Ok(inst) => {
|
||||
// 状態が "healthy" または未設定のもののみ返す
|
||||
if inst.state.as_deref().unwrap_or("healthy") == "healthy" {
|
||||
instances.push(inst);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "failed to parse ServiceInstance from Chainfire");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
service = %service_name,
|
||||
count = instances.len(),
|
||||
"resolved service instances from Chainfire"
|
||||
);
|
||||
|
||||
Ok(instances)
|
||||
}
|
||||
|
||||
pub async fn get_mtls_policy(
|
||||
&self,
|
||||
source_service: &str,
|
||||
target_service: &str,
|
||||
) -> Result<Option<MtlsPolicy>> {
|
||||
let policy_key = format!(
|
||||
"{}-{}",
|
||||
source_service, target_service
|
||||
);
|
||||
|
||||
// キャッシュをチェック
|
||||
{
|
||||
let cache = self.policy_cache.read().await;
|
||||
if let Some(policy) = cache.get(&policy_key) {
|
||||
return Ok(Some(policy.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
// Chainfireから取得
|
||||
let mut client = Client::connect(self.chainfire_endpoint.clone()).await?;
|
||||
let prefix = format!(
|
||||
"{}mtls/policies/",
|
||||
cluster_prefix(&self.cluster_id)
|
||||
);
|
||||
let prefix_bytes = prefix.as_bytes();
|
||||
|
||||
let (kvs, _) = client.scan_prefix(prefix_bytes, 0).await?;
|
||||
|
||||
for (_, value, _) in kvs {
|
||||
match serde_json::from_slice::<MtlsPolicy>(&value) {
|
||||
Ok(policy) => {
|
||||
if policy.source_service == source_service && policy.target_service == target_service {
|
||||
// キャッシュに保存
|
||||
let mut cache = self.policy_cache.write().await;
|
||||
cache.insert(policy_key.clone(), policy.clone());
|
||||
return Ok(Some(policy));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "failed to parse MtlsPolicy from Chainfire");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub async fn start_background_refresh(&self) {
|
||||
let endpoint = self.chainfire_endpoint.clone();
|
||||
let cluster_id = self.cluster_id.clone();
|
||||
let cache = Arc::clone(&self.cache);
|
||||
let policy_cache = Arc::clone(&self.policy_cache);
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(60));
|
||||
loop {
|
||||
interval.tick().await;
|
||||
|
||||
// 全サービスのインスタンスをリフレッシュ
|
||||
if let Ok(mut client) = Client::connect(endpoint.clone()).await {
|
||||
let prefix = format!("{}instances/", cluster_prefix(&cluster_id));
|
||||
if let Ok((kvs, _)) = client.scan_prefix(prefix.as_bytes(), 0).await {
|
||||
let mut service_map: HashMap<String, Vec<ServiceInstance>> = HashMap::new();
|
||||
for (key, value, _) in kvs {
|
||||
if let Ok(inst) = serde_json::from_slice::<ServiceInstance>(&value) {
|
||||
service_map
|
||||
.entry(inst.service.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(inst);
|
||||
}
|
||||
}
|
||||
let mut cache_guard = cache.write().await;
|
||||
for (service, instances) in service_map {
|
||||
cache_guard.insert(
|
||||
service,
|
||||
CachedInstances {
|
||||
instances,
|
||||
updated_at: Instant::now(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn cluster_prefix(cluster_id: &str) -> String {
|
||||
format!("{}/clusters/{}/", PHOTON_PREFIX, cluster_id)
|
||||
}
|
||||
|
||||
337
mtls-agent/src/main.rs
Normal file
337
mtls-agent/src/main.rs
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
mod client;
|
||||
mod discovery;
|
||||
mod policy;
|
||||
|
||||
use std::fs;
|
||||
use std::io::BufReader;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use clap::Parser;
|
||||
use rustls::{pki_types::CertificateDer, pki_types::PrivateKeyDer, ServerConfig};
|
||||
use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
|
||||
use serde::Deserialize;
|
||||
use tokio::io;
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
use tokio::task;
|
||||
use tokio_rustls::TlsAcceptor;
|
||||
use tracing::{info, warn};
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use crate::discovery::ServiceDiscovery;
|
||||
use crate::policy::PolicyEnforcer;
|
||||
|
||||
/// mTLS Agent (MVP: プレーンTCPプロキシ)
|
||||
///
|
||||
/// - 設計どおり、アプリケーションは `app_addr` で平文待受
|
||||
/// - 本Agentは `mesh_bind_addr` で待受し、受信した接続を `app_addr` にフォワードするだけ
|
||||
/// - mTLS/TLS 対応は後続で追加する前提のスケルトン
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about)]
|
||||
struct Cli {
|
||||
/// 設定ファイル (TOML)
|
||||
#[arg(long)]
|
||||
config: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ServiceConfig {
|
||||
name: String,
|
||||
app_addr: String,
|
||||
mesh_bind_addr: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ClusterConfig {
|
||||
cluster_id: String,
|
||||
environment: Option<String>,
|
||||
chainfire_endpoint: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct MtlsConfig {
|
||||
#[serde(default)]
|
||||
mode: Option<String>, // auto/mtls/tls/plain
|
||||
#[serde(default)]
|
||||
ca_cert_path: Option<String>,
|
||||
#[serde(default)]
|
||||
cert_path: Option<String>,
|
||||
#[serde(default)]
|
||||
key_path: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct Config {
|
||||
service: ServiceConfig,
|
||||
#[serde(default)]
|
||||
cluster: Option<ClusterConfig>,
|
||||
#[serde(default)]
|
||||
mtls: Option<MtlsConfig>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env().add_directive("info".parse()?))
|
||||
.init();
|
||||
|
||||
let cli = Cli::parse();
|
||||
let cfg = load_config(&cli.config)?;
|
||||
|
||||
let mode = cfg
|
||||
.mtls
|
||||
.as_ref()
|
||||
.and_then(|m| m.mode.as_deref())
|
||||
.unwrap_or("plain")
|
||||
.to_lowercase();
|
||||
|
||||
info!(
|
||||
service = %cfg.service.name,
|
||||
mesh_bind = %cfg.service.mesh_bind_addr,
|
||||
app_addr = %cfg.service.app_addr,
|
||||
mode = %mode,
|
||||
"starting mtls-agent"
|
||||
);
|
||||
|
||||
// Chainfire統合: サービス発見とポリシー管理
|
||||
let (discovery, _policy_enforcer) = if let Some(cluster_cfg) = &cfg.cluster {
|
||||
if let Some(endpoint) = &cluster_cfg.chainfire_endpoint {
|
||||
let disc = Arc::new(ServiceDiscovery::new(
|
||||
endpoint.clone(),
|
||||
cluster_cfg.cluster_id.clone(),
|
||||
));
|
||||
disc.start_background_refresh().await;
|
||||
|
||||
let enforcer = PolicyEnforcer::new(Arc::clone(&disc), mode.clone());
|
||||
enforcer.start_background_refresh().await;
|
||||
|
||||
(Some(disc), Some(enforcer))
|
||||
} else {
|
||||
(None, None)
|
||||
}
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
// モード決定: "auto" の場合はChainfireからポリシーを読み取る
|
||||
let effective_mode = if mode == "auto" {
|
||||
if let Some(disc) = &discovery {
|
||||
// デフォルトポリシーを確認(簡易実装)
|
||||
// 実際には、自身のサービス名とターゲットサービス名でポリシーを検索
|
||||
if let Ok(Some(policy)) = disc
|
||||
.get_mtls_policy(&cfg.service.name, "default")
|
||||
.await
|
||||
{
|
||||
if policy.mtls_required.unwrap_or(false) {
|
||||
"mtls"
|
||||
} else {
|
||||
"tls"
|
||||
}
|
||||
} else {
|
||||
// 環境に応じたデフォルト
|
||||
let env = cfg
|
||||
.cluster
|
||||
.as_ref()
|
||||
.and_then(|c| c.environment.as_deref())
|
||||
.unwrap_or("dev");
|
||||
if env == "prod" || env == "stg" {
|
||||
"mtls"
|
||||
} else {
|
||||
"plain"
|
||||
}
|
||||
}
|
||||
} else {
|
||||
"plain"
|
||||
}
|
||||
} else {
|
||||
mode.as_str()
|
||||
};
|
||||
|
||||
info!(effective_mode = %effective_mode, "determined mTLS mode");
|
||||
|
||||
match effective_mode {
|
||||
"plain" => {
|
||||
run_plain_proxy(&cfg.service.mesh_bind_addr, &cfg.service.app_addr).await?;
|
||||
}
|
||||
"tls" | "mtls" => {
|
||||
let tls_cfg = build_server_config(&cfg, effective_mode)?;
|
||||
run_tls_proxy(&cfg.service.mesh_bind_addr, &cfg.service.app_addr, tls_cfg).await?;
|
||||
}
|
||||
other => {
|
||||
return Err(anyhow!("unsupported mtls.mode: {}", other));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_config(path: &PathBuf) -> Result<Config> {
|
||||
let contents = fs::read_to_string(path)
|
||||
.with_context(|| format!("failed to read {}", path.display()))?;
|
||||
let cfg: Config =
|
||||
toml::from_str(&contents).with_context(|| format!("failed to parse {}", path.display()))?;
|
||||
Ok(cfg)
|
||||
}
|
||||
|
||||
fn load_certs(path: &str) -> Result<Vec<CertificateDer<'static>>> {
|
||||
let file = fs::File::open(path).with_context(|| format!("failed to open cert file {}", path))?;
|
||||
let mut reader = BufReader::new(file);
|
||||
let certs = certs(&mut reader)
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.map_err(|e| anyhow!("failed to parse certs from {}: {}", path, e))?;
|
||||
Ok(certs)
|
||||
}
|
||||
|
||||
fn load_private_key(path: &str) -> Result<PrivateKeyDer<'static>> {
|
||||
let file = fs::File::open(path).with_context(|| format!("failed to open key file {}", path))?;
|
||||
let mut reader = BufReader::new(file);
|
||||
|
||||
// Try PKCS8 first
|
||||
if let Ok(keys) = pkcs8_private_keys(&mut reader).collect::<std::result::Result<Vec<_>, _>>() {
|
||||
if let Some(k) = keys.into_iter().next() {
|
||||
return Ok(PrivateKeyDer::Pkcs8(k));
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to RSA
|
||||
let file = fs::File::open(path).with_context(|| format!("failed to open key file {}", path))?;
|
||||
let mut reader = BufReader::new(file);
|
||||
let keys = rsa_private_keys(&mut reader)
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.map_err(|e| anyhow!("failed to parse private key from {}: {}", path, e))?;
|
||||
let Some(k) = keys.into_iter().next() else {
|
||||
return Err(anyhow!("no private keys found in {}", path));
|
||||
};
|
||||
Ok(PrivateKeyDer::Pkcs1(k))
|
||||
}
|
||||
|
||||
fn build_server_config(cfg: &Config, mode: &str) -> Result<ServerConfig> {
|
||||
let mtls = cfg
|
||||
.mtls
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow!("mtls section is required for mode {}", mode))?;
|
||||
|
||||
let cert_path = mtls
|
||||
.cert_path
|
||||
.as_deref()
|
||||
.ok_or_else(|| anyhow!("mtls.cert_path is required"))?;
|
||||
let key_path = mtls
|
||||
.key_path
|
||||
.as_deref()
|
||||
.ok_or_else(|| anyhow!("mtls.key_path is required"))?;
|
||||
|
||||
let certs = load_certs(cert_path)?;
|
||||
let key = load_private_key(key_path)?;
|
||||
|
||||
// ベースは「クライアント認証なし」のサーバ設定
|
||||
let builder = ServerConfig::builder();
|
||||
|
||||
// mTLS の場合はクライアント証明書を要求する verifer を使う
|
||||
if mode == "mtls" {
|
||||
let ca_path = mtls
|
||||
.ca_cert_path
|
||||
.as_deref()
|
||||
.ok_or_else(|| anyhow!("mtls.ca_cert_path is required for mtls mode"))?;
|
||||
let client_certs = load_certs(ca_path)?;
|
||||
let mut roots = rustls::RootCertStore::empty();
|
||||
for c in client_certs {
|
||||
roots.add(c).map_err(|e| anyhow!("adding CA failed: {:?}", e))?;
|
||||
}
|
||||
let verifier =
|
||||
rustls::server::WebPkiClientVerifier::builder(std::sync::Arc::new(roots)).build()?;
|
||||
let cfg = builder
|
||||
.with_client_cert_verifier(verifier)
|
||||
.with_single_cert(certs, key)
|
||||
.map_err(|e| anyhow!("failed to build mtls server config: {}", e))?;
|
||||
Ok(cfg)
|
||||
} else {
|
||||
let cfg = builder
|
||||
.with_no_client_auth()
|
||||
.with_single_cert(certs, key)
|
||||
.map_err(|e| anyhow!("failed to build tls server config: {}", e))?;
|
||||
Ok(cfg)
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_plain_proxy(listen_addr: &str, app_addr: &str) -> Result<()> {
|
||||
let listener = TcpListener::bind(listen_addr).await?;
|
||||
info!("listening on {} and forwarding to {}", listen_addr, app_addr);
|
||||
|
||||
loop {
|
||||
let (inbound, peer) = listener.accept().await?;
|
||||
let app_addr = app_addr.to_string();
|
||||
|
||||
info!(remote = %peer, "accepted connection");
|
||||
|
||||
task::spawn(async move {
|
||||
if let Err(e) = handle_connection(inbound, &app_addr).await {
|
||||
warn!(error = %e, "connection handling failed");
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_tls_proxy(
|
||||
listen_addr: &str,
|
||||
app_addr: &str,
|
||||
server_config: ServerConfig,
|
||||
) -> Result<()> {
|
||||
let listener = TcpListener::bind(listen_addr).await?;
|
||||
let acceptor = TlsAcceptor::from(std::sync::Arc::new(server_config));
|
||||
|
||||
info!(
|
||||
"listening (TLS/mTLS) on {} and forwarding to {}",
|
||||
listen_addr, app_addr
|
||||
);
|
||||
|
||||
loop {
|
||||
let (inbound, peer) = listener.accept().await?;
|
||||
let app_addr = app_addr.to_string();
|
||||
let acceptor = acceptor.clone();
|
||||
|
||||
info!(remote = %peer, "accepted TLS connection");
|
||||
|
||||
task::spawn(async move {
|
||||
match acceptor.accept(inbound).await {
|
||||
Ok(tls_stream) => {
|
||||
if let Err(e) = handle_tls_connection(tls_stream, &app_addr).await {
|
||||
warn!(error = %e, "TLS connection handling failed");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "TLS handshake failed");
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_tls_connection<S>(inbound: S, app_addr: &str) -> Result<()>
|
||||
where
|
||||
S: tokio::io::AsyncRead + tokio::io::AsyncWrite + Unpin + Send + 'static,
|
||||
{
|
||||
let mut outbound = TcpStream::connect(app_addr).await?;
|
||||
let (mut ri, mut wi) = tokio::io::split(inbound);
|
||||
let (mut ro, mut wo) = outbound.split();
|
||||
|
||||
let client_to_app = io::copy(&mut ri, &mut wo);
|
||||
let app_to_client = io::copy(&mut ro, &mut wi);
|
||||
|
||||
tokio::try_join!(client_to_app, app_to_client)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_connection(mut inbound: TcpStream, app_addr: &str) -> Result<()> {
|
||||
let mut outbound = TcpStream::connect(app_addr).await?;
|
||||
let (mut ri, mut wi) = inbound.split();
|
||||
let (mut ro, mut wo) = outbound.split();
|
||||
|
||||
let client_to_app = io::copy(&mut ri, &mut wo);
|
||||
let app_to_client = io::copy(&mut ro, &mut wi);
|
||||
|
||||
tokio::try_join!(client_to_app, app_to_client)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue