Make deployer controllers watch-driven
This commit is contained in:
parent
faabcbfc2e
commit
82a4c6a941
12 changed files with 1981 additions and 195 deletions
|
|
@ -35,6 +35,8 @@ pub struct WatchHandle {
|
||||||
watch_id: i64,
|
watch_id: i64,
|
||||||
/// Event receiver
|
/// Event receiver
|
||||||
rx: mpsc::Receiver<WatchEvent>,
|
rx: mpsc::Receiver<WatchEvent>,
|
||||||
|
/// Keep the bidi request stream alive for the lifetime of the watch.
|
||||||
|
_req_tx: mpsc::Sender<WatchRequest>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WatchHandle {
|
impl WatchHandle {
|
||||||
|
|
@ -109,7 +111,11 @@ impl WatchHandle {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
Ok(Self { watch_id, rx })
|
Ok(Self {
|
||||||
|
watch_id,
|
||||||
|
rx,
|
||||||
|
_req_tx: req_tx,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the watch ID
|
/// Get the watch ID
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,9 @@ use chainfire_raft::network::RaftRpcClient;
|
||||||
use chainfire_storage::{RocksStore, LogStorage, StateMachine};
|
use chainfire_storage::{RocksStore, LogStorage, StateMachine};
|
||||||
use chainfire_types::node::NodeRole;
|
use chainfire_types::node::NodeRole;
|
||||||
use chainfire_types::RaftRole;
|
use chainfire_types::RaftRole;
|
||||||
use chainfire_watch::WatchRegistry;
|
use chainfire_watch::{stream::WatchEventHandler, WatchRegistry};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::broadcast;
|
use tokio::sync::{broadcast, mpsc};
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
/// Node instance managing all components
|
/// Node instance managing all components
|
||||||
|
|
@ -49,7 +49,11 @@ impl Node {
|
||||||
|
|
||||||
// Create LogStorage and StateMachine from store
|
// Create LogStorage and StateMachine from store
|
||||||
let log_storage = Arc::new(LogStorage::new(store.clone()));
|
let log_storage = Arc::new(LogStorage::new(store.clone()));
|
||||||
let state_machine = Arc::new(StateMachine::new(store.clone())?);
|
let mut state_machine = StateMachine::new(store.clone())?;
|
||||||
|
let (watch_tx, watch_rx) = mpsc::unbounded_channel();
|
||||||
|
state_machine.set_watch_sender(watch_tx);
|
||||||
|
let state_machine = Arc::new(state_machine);
|
||||||
|
WatchEventHandler::new(Arc::clone(&watch_registry)).spawn_dispatcher(watch_rx);
|
||||||
|
|
||||||
// Create gRPC Raft client and register peer addresses
|
// Create gRPC Raft client and register peer addresses
|
||||||
let rpc_client = Arc::new(GrpcRaftClient::new());
|
let rpc_client = Arc::new(GrpcRaftClient::new());
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
mod auth;
|
mod auth;
|
||||||
mod publish;
|
mod publish;
|
||||||
|
mod watcher;
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use chainfire_client::Client;
|
use chainfire_client::Client;
|
||||||
|
|
@ -15,9 +16,11 @@ use publish::{PublicationConfig, PublicationReconciler};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio::time::sleep;
|
use tokio::sync::{mpsc, oneshot};
|
||||||
|
use tokio::time::MissedTickBehavior;
|
||||||
use tracing::{debug, info, warn};
|
use tracing::{debug, info, warn};
|
||||||
use tracing_subscriber::EnvFilter;
|
use tracing_subscriber::EnvFilter;
|
||||||
|
use watcher::{ChainfireWatcher, WatchChange};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
|
|
@ -26,6 +29,22 @@ use deployer_types::cluster_node_is_eligible;
|
||||||
|
|
||||||
const MANAGED_BY: &str = "fleet-scheduler";
|
const MANAGED_BY: &str = "fleet-scheduler";
|
||||||
|
|
||||||
|
fn cluster_prefix(cluster_namespace: &str, cluster_id: &str) -> String {
|
||||||
|
format!("{}/clusters/{}/", cluster_namespace, cluster_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nodes_prefix(cluster_namespace: &str, cluster_id: &str) -> Vec<u8> {
|
||||||
|
format!("{}nodes/", cluster_prefix(cluster_namespace, cluster_id)).into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn services_prefix(cluster_namespace: &str, cluster_id: &str) -> Vec<u8> {
|
||||||
|
format!("{}services/", cluster_prefix(cluster_namespace, cluster_id)).into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn instances_prefix(cluster_namespace: &str, cluster_id: &str) -> Vec<u8> {
|
||||||
|
format!("{}instances/", cluster_prefix(cluster_namespace, cluster_id)).into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Parser)]
|
#[derive(Debug, Parser)]
|
||||||
#[command(author, version, about = "PhotonCloud non-Kubernetes fleet scheduler")]
|
#[command(author, version, about = "PhotonCloud non-Kubernetes fleet scheduler")]
|
||||||
struct Cli {
|
struct Cli {
|
||||||
|
|
@ -81,6 +100,11 @@ struct Scheduler {
|
||||||
dry_run: bool,
|
dry_run: bool,
|
||||||
publication: PublicationReconciler,
|
publication: PublicationReconciler,
|
||||||
once: bool,
|
once: bool,
|
||||||
|
watched_node_signatures: HashMap<String, Vec<u8>>,
|
||||||
|
watched_service_signatures: HashMap<String, Vec<u8>>,
|
||||||
|
watched_instance_signatures: HashMap<String, Vec<u8>>,
|
||||||
|
pending_instance_writes: HashMap<String, Vec<u8>>,
|
||||||
|
pending_instance_deletes: HashSet<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
@ -100,6 +124,85 @@ struct ReconcilePlan {
|
||||||
|
|
||||||
type DependencySummary = ServiceDependencySummary;
|
type DependencySummary = ServiceDependencySummary;
|
||||||
|
|
||||||
|
async fn spawn_watch_task(
|
||||||
|
watcher: ChainfireWatcher,
|
||||||
|
tx: mpsc::Sender<WatchChange>,
|
||||||
|
ready: oneshot::Sender<()>,
|
||||||
|
) {
|
||||||
|
let mut ready = Some(ready);
|
||||||
|
if let Err(error) = watcher
|
||||||
|
.watch_with_ready(
|
||||||
|
move || {
|
||||||
|
if let Some(ready) = ready.take() {
|
||||||
|
let _ = ready.send(());
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
move |change| {
|
||||||
|
tx.try_send(change).map_err(|send_error| {
|
||||||
|
anyhow::anyhow!("failed to enqueue watch change: {send_error}")
|
||||||
|
})
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
warn!(error = %error, "fleet scheduler watch task exited");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_watchers_ready(receivers: Vec<oneshot::Receiver<()>>) {
|
||||||
|
for receiver in receivers {
|
||||||
|
match tokio::time::timeout(Duration::from_secs(10), receiver).await {
|
||||||
|
Ok(Ok(())) => {}
|
||||||
|
Ok(Err(_)) => warn!("fleet scheduler watch task ended before signaling readiness"),
|
||||||
|
Err(_) => warn!("timed out waiting for fleet scheduler watch readiness"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonical_json_signature(bytes: &[u8], drop_keys: &[&str]) -> Option<Vec<u8>> {
|
||||||
|
let mut value: Value = serde_json::from_slice(bytes).ok()?;
|
||||||
|
strip_observed_fields(&mut value, drop_keys);
|
||||||
|
serde_json::to_vec(&sort_json_value(value)).ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn strip_observed_fields(value: &mut Value, drop_keys: &[&str]) {
|
||||||
|
match value {
|
||||||
|
Value::Object(map) => {
|
||||||
|
for key in drop_keys {
|
||||||
|
map.remove(*key);
|
||||||
|
}
|
||||||
|
for nested in map.values_mut() {
|
||||||
|
strip_observed_fields(nested, drop_keys);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::Array(items) => {
|
||||||
|
for item in items {
|
||||||
|
strip_observed_fields(item, drop_keys);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sort_json_value(value: Value) -> Value {
|
||||||
|
match value {
|
||||||
|
Value::Array(items) => Value::Array(items.into_iter().map(sort_json_value).collect()),
|
||||||
|
Value::Object(map) => {
|
||||||
|
let mut keys = map.keys().cloned().collect::<Vec<_>>();
|
||||||
|
keys.sort();
|
||||||
|
let mut normalized = serde_json::Map::new();
|
||||||
|
for key in keys {
|
||||||
|
if let Some(value) = map.get(&key) {
|
||||||
|
normalized.insert(key, sort_json_value(value.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::Object(normalized)
|
||||||
|
}
|
||||||
|
other => other,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Scheduler {
|
impl Scheduler {
|
||||||
fn new(cli: Cli) -> Self {
|
fn new(cli: Cli) -> Self {
|
||||||
let cluster_namespace = cli.cluster_namespace;
|
let cluster_namespace = cli.cluster_namespace;
|
||||||
|
|
@ -125,23 +228,58 @@ impl Scheduler {
|
||||||
dry_run: cli.dry_run,
|
dry_run: cli.dry_run,
|
||||||
}),
|
}),
|
||||||
once: cli.once,
|
once: cli.once,
|
||||||
|
watched_node_signatures: HashMap::new(),
|
||||||
|
watched_service_signatures: HashMap::new(),
|
||||||
|
watched_instance_signatures: HashMap::new(),
|
||||||
|
pending_instance_writes: HashMap::new(),
|
||||||
|
pending_instance_deletes: HashSet::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_loop(&self) -> Result<()> {
|
async fn run_loop(&mut self) -> Result<()> {
|
||||||
if self.once {
|
if self.once {
|
||||||
return self.reconcile_once().await;
|
return self.reconcile_once().await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.reconcile_once().await?;
|
||||||
|
|
||||||
|
let (watch_tx, mut watch_rx) = mpsc::channel::<WatchChange>(128);
|
||||||
|
let ready_receivers = self.spawn_watchers(watch_tx);
|
||||||
|
wait_for_watchers_ready(ready_receivers).await;
|
||||||
|
self.reconcile_once().await?;
|
||||||
|
|
||||||
|
let mut interval = tokio::time::interval(self.interval);
|
||||||
|
interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||||
|
interval.tick().await;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
tokio::select! {
|
||||||
|
_ = interval.tick() => {
|
||||||
if let Err(error) = self.reconcile_once().await {
|
if let Err(error) = self.reconcile_once().await {
|
||||||
warn!(error = %error, "fleet scheduler reconciliation failed");
|
warn!(error = %error, "fleet scheduler reconciliation failed");
|
||||||
}
|
}
|
||||||
sleep(self.interval).await;
|
}
|
||||||
|
maybe_change = watch_rx.recv() => {
|
||||||
|
let Some(change) = maybe_change else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut should_reconcile = self.apply_watch_change(&change);
|
||||||
|
while let Ok(extra) = watch_rx.try_recv() {
|
||||||
|
should_reconcile |= self.apply_watch_change(&extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
if should_reconcile {
|
||||||
|
if let Err(error) = self.reconcile_once().await {
|
||||||
|
warn!(error = %error, "fleet scheduler watch-triggered reconciliation failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn reconcile_once(&self) -> Result<()> {
|
async fn reconcile_once(&mut self) -> Result<()> {
|
||||||
let mut client = Client::connect(self.endpoint.clone()).await?;
|
let mut client = Client::connect(self.endpoint.clone()).await?;
|
||||||
let nodes = self.load_cluster_nodes(&mut client).await?;
|
let nodes = self.load_cluster_nodes(&mut client).await?;
|
||||||
let services = self.load_services(&mut client).await?;
|
let services = self.load_services(&mut client).await?;
|
||||||
|
|
@ -219,6 +357,152 @@ impl Scheduler {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn spawn_watchers(&self, tx: mpsc::Sender<WatchChange>) -> Vec<oneshot::Receiver<()>> {
|
||||||
|
let mut ready_receivers = Vec::new();
|
||||||
|
|
||||||
|
let node_watcher = ChainfireWatcher::prefix(
|
||||||
|
self.endpoint.clone(),
|
||||||
|
nodes_prefix(&self.cluster_namespace, &self.cluster_id),
|
||||||
|
);
|
||||||
|
let (node_ready_tx, node_ready_rx) = oneshot::channel();
|
||||||
|
tokio::spawn(spawn_watch_task(node_watcher, tx.clone(), node_ready_tx));
|
||||||
|
ready_receivers.push(node_ready_rx);
|
||||||
|
|
||||||
|
let service_watcher = ChainfireWatcher::prefix(
|
||||||
|
self.endpoint.clone(),
|
||||||
|
services_prefix(&self.cluster_namespace, &self.cluster_id),
|
||||||
|
);
|
||||||
|
let (service_ready_tx, service_ready_rx) = oneshot::channel();
|
||||||
|
tokio::spawn(spawn_watch_task(
|
||||||
|
service_watcher,
|
||||||
|
tx.clone(),
|
||||||
|
service_ready_tx,
|
||||||
|
));
|
||||||
|
ready_receivers.push(service_ready_rx);
|
||||||
|
|
||||||
|
let instance_watcher = ChainfireWatcher::prefix(
|
||||||
|
self.endpoint.clone(),
|
||||||
|
instances_prefix(&self.cluster_namespace, &self.cluster_id),
|
||||||
|
);
|
||||||
|
let (instance_ready_tx, instance_ready_rx) = oneshot::channel();
|
||||||
|
tokio::spawn(spawn_watch_task(instance_watcher, tx, instance_ready_tx));
|
||||||
|
ready_receivers.push(instance_ready_rx);
|
||||||
|
|
||||||
|
ready_receivers
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_watch_change(&mut self, change: &WatchChange) -> bool {
|
||||||
|
let key = String::from_utf8_lossy(&change.key).to_string();
|
||||||
|
let nodes_prefix = String::from_utf8_lossy(&nodes_prefix(&self.cluster_namespace, &self.cluster_id))
|
||||||
|
.to_string();
|
||||||
|
let services_prefix =
|
||||||
|
String::from_utf8_lossy(&services_prefix(&self.cluster_namespace, &self.cluster_id))
|
||||||
|
.to_string();
|
||||||
|
let instances_prefix =
|
||||||
|
String::from_utf8_lossy(&instances_prefix(&self.cluster_namespace, &self.cluster_id))
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
if let Some(suffix) = key.strip_prefix(&nodes_prefix) {
|
||||||
|
if !suffix.is_empty() && !suffix.contains('/') {
|
||||||
|
return self.apply_node_watch_change(&key, change);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(suffix) = key.strip_prefix(&services_prefix) {
|
||||||
|
if !suffix.is_empty() && !suffix.contains('/') {
|
||||||
|
return self.apply_service_watch_change(&key, change);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if key.starts_with(&instances_prefix) {
|
||||||
|
return self.apply_instance_watch_change(&key, change);
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_node_watch_change(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_node_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(signature) = canonical_json_signature(&change.value, &["last_heartbeat"]) else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if self.watched_node_signatures.get(key) == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.watched_node_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_service_watch_change(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_service_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(signature) = canonical_json_signature(&change.value, &[]) else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if self.watched_service_signatures.get(key) == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.watched_service_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_instance_watch_change(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
if let Some(expected) = self.pending_instance_writes.get(key) {
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Put) && change.value == *expected
|
||||||
|
{
|
||||||
|
self.pending_instance_writes.remove(key);
|
||||||
|
self.pending_instance_deletes.remove(key);
|
||||||
|
if let Some(signature) =
|
||||||
|
canonical_json_signature(&change.value, &["last_heartbeat", "observed_at"])
|
||||||
|
{
|
||||||
|
self.watched_instance_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
} else {
|
||||||
|
self.watched_instance_signatures.remove(key);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.pending_instance_writes.remove(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.pending_instance_deletes.remove(key)
|
||||||
|
&& matches!(change.event_type, chainfire_client::EventType::Delete)
|
||||||
|
{
|
||||||
|
self.watched_instance_signatures.remove(key);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_instance_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(signature) = canonical_json_signature(&change.value, &["last_heartbeat", "observed_at"])
|
||||||
|
else {
|
||||||
|
self.watched_instance_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
if self.watched_instance_signatures.get(key) == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.watched_instance_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
async fn load_cluster_nodes(&self, client: &mut Client) -> Result<Vec<ClusterNodeRecord>> {
|
async fn load_cluster_nodes(&self, client: &mut Client) -> Result<Vec<ClusterNodeRecord>> {
|
||||||
let prefix = format!(
|
let prefix = format!(
|
||||||
"{}/clusters/{}/nodes/",
|
"{}/clusters/{}/nodes/",
|
||||||
|
|
@ -364,7 +648,7 @@ impl Scheduler {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn reconcile_service(
|
async fn reconcile_service(
|
||||||
&self,
|
&mut self,
|
||||||
client: &mut Client,
|
client: &mut Client,
|
||||||
nodes: &[ClusterNodeRecord],
|
nodes: &[ClusterNodeRecord],
|
||||||
service: &ServiceSpec,
|
service: &ServiceSpec,
|
||||||
|
|
@ -420,9 +704,15 @@ impl Scheduler {
|
||||||
"would upsert managed instance"
|
"would upsert managed instance"
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
client
|
let key_str = key.clone();
|
||||||
.put(&key, serde_json::to_vec(&upsert.desired_value)?)
|
let payload = serde_json::to_vec(&upsert.desired_value)?;
|
||||||
.await?;
|
self.pending_instance_deletes.remove(&key_str);
|
||||||
|
self.pending_instance_writes
|
||||||
|
.insert(key_str.clone(), payload.clone());
|
||||||
|
if let Err(error) = client.put(&key, &payload).await {
|
||||||
|
self.pending_instance_writes.remove(&key_str);
|
||||||
|
return Err(error.into());
|
||||||
|
}
|
||||||
info!(
|
info!(
|
||||||
service = %service.name,
|
service = %service.name,
|
||||||
instance_id = %upsert.instance.instance_id,
|
instance_id = %upsert.instance.instance_id,
|
||||||
|
|
@ -456,12 +746,19 @@ impl Scheduler {
|
||||||
instance_id = %instance_id,
|
instance_id = %instance_id,
|
||||||
"would delete stale managed instance"
|
"would delete stale managed instance"
|
||||||
);
|
);
|
||||||
} else if client.delete(&key).await? {
|
} else {
|
||||||
|
let key_str = key.clone();
|
||||||
|
self.pending_instance_writes.remove(&key_str);
|
||||||
|
self.pending_instance_deletes.insert(key_str.clone());
|
||||||
|
if client.delete(&key).await? {
|
||||||
info!(
|
info!(
|
||||||
service = %service.name,
|
service = %service.name,
|
||||||
instance_id = %instance_id,
|
instance_id = %instance_id,
|
||||||
"deleted stale managed instance"
|
"deleted stale managed instance"
|
||||||
);
|
);
|
||||||
|
} else {
|
||||||
|
self.pending_instance_deletes.remove(&key_str);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1095,7 +1392,7 @@ async fn main() -> Result<()> {
|
||||||
.with_env_filter(EnvFilter::from_default_env().add_directive("info".parse()?))
|
.with_env_filter(EnvFilter::from_default_env().add_directive("info".parse()?))
|
||||||
.init();
|
.init();
|
||||||
|
|
||||||
let scheduler = Scheduler::new(Cli::parse());
|
let mut scheduler = Scheduler::new(Cli::parse());
|
||||||
scheduler.run_loop().await
|
scheduler.run_loop().await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1103,6 +1400,7 @@ async fn main() -> Result<()> {
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use chrono::Duration as ChronoDuration;
|
use chrono::Duration as ChronoDuration;
|
||||||
|
use chainfire_client::EventType;
|
||||||
use deployer_types::{
|
use deployer_types::{
|
||||||
ClusterNodeRecord, HealthCheckSpec, PlacementPolicy, ProcessSpec,
|
ClusterNodeRecord, HealthCheckSpec, PlacementPolicy, ProcessSpec,
|
||||||
PublishedLoadBalancerState, RolloutStrategySpec, ServiceDependencyCondition,
|
PublishedLoadBalancerState, RolloutStrategySpec, ServiceDependencyCondition,
|
||||||
|
|
@ -1206,6 +1504,25 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn test_scheduler() -> Scheduler {
|
||||||
|
Scheduler::new(Cli {
|
||||||
|
chainfire_endpoint: "http://127.0.0.1:7000".to_string(),
|
||||||
|
cluster_namespace: "photoncloud".to_string(),
|
||||||
|
cluster_id: "test-cluster".to_string(),
|
||||||
|
interval_secs: 1,
|
||||||
|
heartbeat_timeout_secs: 300,
|
||||||
|
dry_run: true,
|
||||||
|
iam_endpoint: None,
|
||||||
|
fiberlb_endpoint: None,
|
||||||
|
flashdns_endpoint: None,
|
||||||
|
publish_address: None,
|
||||||
|
default_org_id: "default-org".to_string(),
|
||||||
|
default_project_id: "default-project".to_string(),
|
||||||
|
controller_principal_id: MANAGED_BY.to_string(),
|
||||||
|
once: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_node_eligibility_matches_roles_and_labels() {
|
fn test_node_eligibility_matches_roles_and_labels() {
|
||||||
let node = active_node("node01", &["worker"], &[("tier", "general")]);
|
let node = active_node("node01", &["worker"], &[("tier", "general")]);
|
||||||
|
|
@ -1836,4 +2153,135 @@ mod tests {
|
||||||
|
|
||||||
assert_eq!(instance_id, "api-node01-3");
|
assert_eq!(instance_id, "api-node01-3");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_scheduler_watch_ignores_node_heartbeat_only_changes() {
|
||||||
|
let mut scheduler = test_scheduler();
|
||||||
|
let node_key = format!(
|
||||||
|
"{}nodes/node01",
|
||||||
|
cluster_prefix(&scheduler.cluster_namespace, &scheduler.cluster_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
let first = WatchChange {
|
||||||
|
event_type: EventType::Put,
|
||||||
|
key: node_key.as_bytes().to_vec(),
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"node_id": "node01",
|
||||||
|
"hostname": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"roles": ["worker"],
|
||||||
|
"labels": {"tier": "general"},
|
||||||
|
"state": "active",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:00Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
revision: 1,
|
||||||
|
};
|
||||||
|
assert!(scheduler.apply_watch_change(&first));
|
||||||
|
|
||||||
|
let heartbeat_only = WatchChange {
|
||||||
|
revision: 2,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"node_id": "node01",
|
||||||
|
"hostname": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"roles": ["worker"],
|
||||||
|
"labels": {"tier": "general"},
|
||||||
|
"state": "active",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:05Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..first.clone()
|
||||||
|
};
|
||||||
|
assert!(!scheduler.apply_watch_change(&heartbeat_only));
|
||||||
|
|
||||||
|
let drain = WatchChange {
|
||||||
|
revision: 3,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"node_id": "node01",
|
||||||
|
"hostname": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"roles": ["worker"],
|
||||||
|
"labels": {"tier": "general"},
|
||||||
|
"state": "draining",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:10Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..first
|
||||||
|
};
|
||||||
|
assert!(scheduler.apply_watch_change(&drain));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_scheduler_watch_suppresses_self_written_instance_update_but_reacts_to_health_change() {
|
||||||
|
let mut scheduler = test_scheduler();
|
||||||
|
let key = instance_key(
|
||||||
|
&scheduler.cluster_namespace,
|
||||||
|
&scheduler.cluster_id,
|
||||||
|
"api",
|
||||||
|
"api-node01",
|
||||||
|
);
|
||||||
|
let initial = serde_json::to_vec(&serde_json::json!({
|
||||||
|
"instance_id": "api-node01",
|
||||||
|
"service": "api",
|
||||||
|
"node_id": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"port": 18080,
|
||||||
|
"managed_by": "fleet-scheduler",
|
||||||
|
"state": "starting",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:00Z",
|
||||||
|
"observed_at": "2026-04-01T00:00:00Z"
|
||||||
|
}))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let first = WatchChange {
|
||||||
|
event_type: EventType::Put,
|
||||||
|
key: key.as_bytes().to_vec(),
|
||||||
|
value: initial.clone(),
|
||||||
|
revision: 1,
|
||||||
|
};
|
||||||
|
assert!(scheduler.apply_watch_change(&first));
|
||||||
|
|
||||||
|
let self_write = serde_json::to_vec(&serde_json::json!({
|
||||||
|
"instance_id": "api-node01",
|
||||||
|
"service": "api",
|
||||||
|
"node_id": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"port": 18081,
|
||||||
|
"managed_by": "fleet-scheduler",
|
||||||
|
"state": "starting",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:00Z",
|
||||||
|
"observed_at": "2026-04-01T00:00:00Z"
|
||||||
|
}))
|
||||||
|
.unwrap();
|
||||||
|
scheduler
|
||||||
|
.pending_instance_writes
|
||||||
|
.insert(key.clone(), self_write.clone());
|
||||||
|
|
||||||
|
assert!(!scheduler.apply_watch_change(&WatchChange {
|
||||||
|
event_type: EventType::Put,
|
||||||
|
key: key.as_bytes().to_vec(),
|
||||||
|
value: self_write,
|
||||||
|
revision: 2,
|
||||||
|
}));
|
||||||
|
|
||||||
|
let healthy = serde_json::to_vec(&serde_json::json!({
|
||||||
|
"instance_id": "api-node01",
|
||||||
|
"service": "api",
|
||||||
|
"node_id": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"port": 18081,
|
||||||
|
"managed_by": "fleet-scheduler",
|
||||||
|
"state": "healthy",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:05Z",
|
||||||
|
"observed_at": "2026-04-01T00:00:05Z"
|
||||||
|
}))
|
||||||
|
.unwrap();
|
||||||
|
assert!(scheduler.apply_watch_change(&WatchChange {
|
||||||
|
event_type: EventType::Put,
|
||||||
|
key: key.as_bytes().to_vec(),
|
||||||
|
value: healthy,
|
||||||
|
revision: 3,
|
||||||
|
}));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
122
deployer/crates/fleet-scheduler/src/watcher.rs
Normal file
122
deployer/crates/fleet-scheduler/src/watcher.rs
Normal file
|
|
@ -0,0 +1,122 @@
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use chainfire_client::{Client, EventType};
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub enum WatchScope {
|
||||||
|
Key,
|
||||||
|
Prefix,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct WatchChange {
|
||||||
|
pub event_type: EventType,
|
||||||
|
pub key: Vec<u8>,
|
||||||
|
pub value: Vec<u8>,
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub revision: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ChainfireWatcher {
|
||||||
|
endpoint: String,
|
||||||
|
scope: WatchScope,
|
||||||
|
target: Vec<u8>,
|
||||||
|
reconnect_backoff: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChainfireWatcher {
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn key(endpoint: String, key: Vec<u8>) -> Self {
|
||||||
|
Self {
|
||||||
|
endpoint,
|
||||||
|
scope: WatchScope::Key,
|
||||||
|
target: key,
|
||||||
|
reconnect_backoff: Duration::from_secs(1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn prefix(endpoint: String, prefix: Vec<u8>) -> Self {
|
||||||
|
Self {
|
||||||
|
endpoint,
|
||||||
|
scope: WatchScope::Prefix,
|
||||||
|
target: prefix,
|
||||||
|
reconnect_backoff: Duration::from_secs(1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub async fn watch<F>(&self, mut callback: F) -> Result<()>
|
||||||
|
where
|
||||||
|
F: FnMut(WatchChange) -> Result<()>,
|
||||||
|
{
|
||||||
|
self.watch_with_ready(|| Ok(()), move |change| callback(change))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn watch_with_ready<F, G>(&self, mut on_connected: G, mut callback: F) -> Result<()>
|
||||||
|
where
|
||||||
|
F: FnMut(WatchChange) -> Result<()>,
|
||||||
|
G: FnMut() -> Result<()>,
|
||||||
|
{
|
||||||
|
loop {
|
||||||
|
match Client::connect(self.endpoint.clone()).await {
|
||||||
|
Ok(mut client) => {
|
||||||
|
let watch_result = match self.scope {
|
||||||
|
WatchScope::Key => client.watch(&self.target).await,
|
||||||
|
WatchScope::Prefix => client.watch_prefix(&self.target).await,
|
||||||
|
};
|
||||||
|
|
||||||
|
match watch_result {
|
||||||
|
Ok(mut handle) => {
|
||||||
|
info!(
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
watch_id = handle.id(),
|
||||||
|
"connected ChainFire watch"
|
||||||
|
);
|
||||||
|
on_connected()?;
|
||||||
|
|
||||||
|
while let Some(event) = handle.recv().await {
|
||||||
|
if let Err(error) = callback(WatchChange {
|
||||||
|
event_type: event.event_type,
|
||||||
|
key: event.key,
|
||||||
|
value: event.value,
|
||||||
|
revision: event.revision,
|
||||||
|
}) {
|
||||||
|
warn!(error = %error, "watch callback failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
warn!(
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
"ChainFire watch stream ended; reconnecting"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
warn!(
|
||||||
|
error = %error,
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
"failed to create ChainFire watch"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
warn!(
|
||||||
|
error = %error,
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
"failed to connect ChainFire watch"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tokio::time::sleep(self.reconnect_backoff).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -15,10 +15,12 @@ use deployer_types::{
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tokio::process::Command;
|
use tokio::process::Command;
|
||||||
use tokio::time::sleep;
|
use tokio::sync::mpsc;
|
||||||
|
use tokio::time::MissedTickBehavior;
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
|
|
||||||
use crate::process::{render_container_process_spec, ProcessManager};
|
use crate::process::{render_container_process_spec, ProcessManager};
|
||||||
|
use crate::watcher::{ChainfireWatcher, WatchChange};
|
||||||
|
|
||||||
fn cluster_prefix(cluster_namespace: &str, cluster_id: &str) -> String {
|
fn cluster_prefix(cluster_namespace: &str, cluster_id: &str) -> String {
|
||||||
format!("{}/clusters/{}/", cluster_namespace, cluster_id)
|
format!("{}/clusters/{}/", cluster_namespace, cluster_id)
|
||||||
|
|
@ -48,6 +50,10 @@ fn key_instance(
|
||||||
.into_bytes()
|
.into_bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn instances_prefix(cluster_namespace: &str, cluster_id: &str) -> Vec<u8> {
|
||||||
|
format!("{}instances/", cluster_prefix(cluster_namespace, cluster_id)).into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
fn service_status_prefix(cluster_namespace: &str, cluster_id: &str) -> String {
|
fn service_status_prefix(cluster_namespace: &str, cluster_id: &str) -> String {
|
||||||
format!(
|
format!(
|
||||||
"{}/clusters/{}/service-statuses/",
|
"{}/clusters/{}/service-statuses/",
|
||||||
|
|
@ -92,6 +98,8 @@ pub struct Agent {
|
||||||
allow_local_instance_upsert: bool,
|
allow_local_instance_upsert: bool,
|
||||||
process_manager: ProcessManager,
|
process_manager: ProcessManager,
|
||||||
next_health_checks: HashMap<String, DateTime<Utc>>,
|
next_health_checks: HashMap<String, DateTime<Utc>>,
|
||||||
|
watched_node_signature: Option<Vec<u8>>,
|
||||||
|
watched_instance_signatures: HashMap<String, Vec<u8>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, Serialize, Clone)]
|
#[derive(Debug, Deserialize, Serialize, Clone)]
|
||||||
|
|
@ -112,6 +120,61 @@ struct LocalInstanceSpec {
|
||||||
container: Option<ContainerSpec>,
|
container: Option<ContainerSpec>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn spawn_watch_task(watcher: ChainfireWatcher, tx: mpsc::Sender<WatchChange>) {
|
||||||
|
if let Err(error) = watcher
|
||||||
|
.watch(move |change| {
|
||||||
|
tx.try_send(change)
|
||||||
|
.map_err(|send_error| anyhow::anyhow!("failed to enqueue watch change: {}", send_error))
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
warn!(error = %error, "node-agent watch task exited");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonical_json_signature(bytes: &[u8], drop_keys: &[&str]) -> Option<Vec<u8>> {
|
||||||
|
let mut value: Value = serde_json::from_slice(bytes).ok()?;
|
||||||
|
strip_observed_fields(&mut value, drop_keys);
|
||||||
|
serde_json::to_vec(&sort_json_value(value)).ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn strip_observed_fields(value: &mut Value, drop_keys: &[&str]) {
|
||||||
|
match value {
|
||||||
|
Value::Object(map) => {
|
||||||
|
for key in drop_keys {
|
||||||
|
map.remove(*key);
|
||||||
|
}
|
||||||
|
for nested in map.values_mut() {
|
||||||
|
strip_observed_fields(nested, drop_keys);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::Array(items) => {
|
||||||
|
for item in items {
|
||||||
|
strip_observed_fields(item, drop_keys);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sort_json_value(value: Value) -> Value {
|
||||||
|
match value {
|
||||||
|
Value::Array(items) => Value::Array(items.into_iter().map(sort_json_value).collect()),
|
||||||
|
Value::Object(map) => {
|
||||||
|
let mut keys = map.keys().cloned().collect::<Vec<_>>();
|
||||||
|
keys.sort();
|
||||||
|
let mut normalized = serde_json::Map::new();
|
||||||
|
for key in keys {
|
||||||
|
if let Some(value) = map.get(&key) {
|
||||||
|
normalized.insert(key, sort_json_value(value.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::Object(normalized)
|
||||||
|
}
|
||||||
|
other => other,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Agent {
|
impl Agent {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
endpoint: String,
|
endpoint: String,
|
||||||
|
|
@ -135,15 +198,44 @@ impl Agent {
|
||||||
allow_local_instance_upsert,
|
allow_local_instance_upsert,
|
||||||
process_manager: ProcessManager::new(pid_dir),
|
process_manager: ProcessManager::new(pid_dir),
|
||||||
next_health_checks: HashMap::new(),
|
next_health_checks: HashMap::new(),
|
||||||
|
watched_node_signature: None,
|
||||||
|
watched_instance_signatures: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn run_loop(&mut self) -> Result<()> {
|
pub async fn run_loop(&mut self) -> Result<()> {
|
||||||
|
self.tick().await?;
|
||||||
|
|
||||||
|
let (watch_tx, mut watch_rx) = mpsc::channel::<WatchChange>(64);
|
||||||
|
self.spawn_watchers(watch_tx);
|
||||||
|
let mut interval = tokio::time::interval(self.interval);
|
||||||
|
interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||||
|
interval.tick().await;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
if let Err(e) = self.tick().await {
|
tokio::select! {
|
||||||
warn!(error = %e, "node-agent tick failed");
|
_ = interval.tick() => {
|
||||||
|
if let Err(error) = self.tick().await {
|
||||||
|
warn!(error = %error, "node-agent tick failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
maybe_change = watch_rx.recv() => {
|
||||||
|
let Some(change) = maybe_change else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut should_reconcile = self.apply_watch_change(&change);
|
||||||
|
while let Ok(extra) = watch_rx.try_recv() {
|
||||||
|
should_reconcile |= self.apply_watch_change(&extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
if should_reconcile {
|
||||||
|
if let Err(error) = self.tick().await {
|
||||||
|
warn!(error = %error, "node-agent watch-triggered tick failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
sleep(self.interval).await;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -234,6 +326,76 @@ impl Agent {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn spawn_watchers(&self, tx: mpsc::Sender<WatchChange>) {
|
||||||
|
let node_watcher = ChainfireWatcher::key(
|
||||||
|
self.endpoint.clone(),
|
||||||
|
key_node(&self.cluster_namespace, &self.cluster_id, &self.node_id),
|
||||||
|
);
|
||||||
|
tokio::spawn(spawn_watch_task(node_watcher, tx.clone()));
|
||||||
|
|
||||||
|
let instances_watcher = ChainfireWatcher::prefix(
|
||||||
|
self.endpoint.clone(),
|
||||||
|
instances_prefix(&self.cluster_namespace, &self.cluster_id),
|
||||||
|
);
|
||||||
|
tokio::spawn(spawn_watch_task(instances_watcher, tx));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_watch_change(&mut self, change: &WatchChange) -> bool {
|
||||||
|
let key = String::from_utf8_lossy(&change.key).to_string();
|
||||||
|
let node_key =
|
||||||
|
String::from_utf8_lossy(&key_node(&self.cluster_namespace, &self.cluster_id, &self.node_id))
|
||||||
|
.to_string();
|
||||||
|
let instances_prefix =
|
||||||
|
String::from_utf8_lossy(&instances_prefix(&self.cluster_namespace, &self.cluster_id))
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
if key == node_key {
|
||||||
|
return self.apply_node_watch_change(change);
|
||||||
|
}
|
||||||
|
|
||||||
|
if key.starts_with(&instances_prefix) {
|
||||||
|
return self.apply_instance_watch_change(&key, change);
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_node_watch_change(&mut self, change: &WatchChange) -> bool {
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_node_signature = None;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
let Some(signature) = canonical_json_signature(&change.value, &["last_heartbeat"]) else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if self.watched_node_signature.as_ref() == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.watched_node_signature = Some(signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_instance_watch_change(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_instance_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
let Some(signature) =
|
||||||
|
canonical_json_signature(&change.value, &["state", "last_heartbeat", "observed_at"])
|
||||||
|
else {
|
||||||
|
self.watched_instance_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
if self.watched_instance_signatures.get(key) == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.watched_instance_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
fn is_local_instance(&self, inst: &LocalInstanceSpec) -> bool {
|
fn is_local_instance(&self, inst: &LocalInstanceSpec) -> bool {
|
||||||
matches!(inst.node_id.as_deref(), Some(node_id) if node_id == self.node_id)
|
matches!(inst.node_id.as_deref(), Some(node_id) if node_id == self.node_id)
|
||||||
}
|
}
|
||||||
|
|
@ -1099,4 +1261,125 @@ mod tests {
|
||||||
Some("healthy")
|
Some("healthy")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn watch_signature_ignores_node_heartbeat_only_changes() {
|
||||||
|
let mut agent = test_agent();
|
||||||
|
let node_key = key_node(&agent.cluster_namespace, &agent.cluster_id, &agent.node_id);
|
||||||
|
|
||||||
|
let first = WatchChange {
|
||||||
|
event_type: chainfire_client::EventType::Put,
|
||||||
|
key: node_key.clone(),
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"node_id": "node01",
|
||||||
|
"hostname": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"roles": ["worker"],
|
||||||
|
"labels": {"tier": "general"},
|
||||||
|
"state": "active",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:00Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
revision: 1,
|
||||||
|
};
|
||||||
|
assert!(agent.apply_watch_change(&first));
|
||||||
|
|
||||||
|
let heartbeat_only = WatchChange {
|
||||||
|
revision: 2,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"node_id": "node01",
|
||||||
|
"hostname": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"roles": ["worker"],
|
||||||
|
"labels": {"tier": "general"},
|
||||||
|
"state": "active",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:05Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..first.clone()
|
||||||
|
};
|
||||||
|
assert!(!agent.apply_watch_change(&heartbeat_only));
|
||||||
|
|
||||||
|
let drain = WatchChange {
|
||||||
|
revision: 3,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"node_id": "node01",
|
||||||
|
"hostname": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"roles": ["worker"],
|
||||||
|
"labels": {"tier": "general"},
|
||||||
|
"state": "draining",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:10Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..first
|
||||||
|
};
|
||||||
|
assert!(agent.apply_watch_change(&drain));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn watch_signature_ignores_instance_observed_fields_only_changes() {
|
||||||
|
let mut agent = test_agent();
|
||||||
|
let instance_key = key_instance(
|
||||||
|
&agent.cluster_namespace,
|
||||||
|
&agent.cluster_id,
|
||||||
|
"api",
|
||||||
|
"api-node01",
|
||||||
|
);
|
||||||
|
|
||||||
|
let first = WatchChange {
|
||||||
|
event_type: chainfire_client::EventType::Put,
|
||||||
|
key: instance_key.clone(),
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"instance_id": "api-node01",
|
||||||
|
"service": "api",
|
||||||
|
"node_id": "node01",
|
||||||
|
"ip": "127.0.0.2",
|
||||||
|
"port": 18080,
|
||||||
|
"managed_by": "fleet-scheduler",
|
||||||
|
"state": null,
|
||||||
|
"last_heartbeat": null,
|
||||||
|
"observed_at": null
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
revision: 1,
|
||||||
|
};
|
||||||
|
assert!(agent.apply_watch_change(&first));
|
||||||
|
|
||||||
|
let observed_only = WatchChange {
|
||||||
|
revision: 2,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"instance_id": "api-node01",
|
||||||
|
"service": "api",
|
||||||
|
"node_id": "node01",
|
||||||
|
"ip": "127.0.0.2",
|
||||||
|
"port": 18080,
|
||||||
|
"managed_by": "fleet-scheduler",
|
||||||
|
"state": "healthy",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:05Z",
|
||||||
|
"observed_at": "2026-04-01T00:00:05Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..first.clone()
|
||||||
|
};
|
||||||
|
assert!(!agent.apply_watch_change(&observed_only));
|
||||||
|
|
||||||
|
let desired_change = WatchChange {
|
||||||
|
revision: 3,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"instance_id": "api-node01",
|
||||||
|
"service": "api",
|
||||||
|
"node_id": "node01",
|
||||||
|
"ip": "127.0.0.2",
|
||||||
|
"port": 18081,
|
||||||
|
"managed_by": "fleet-scheduler",
|
||||||
|
"state": "healthy",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:10Z",
|
||||||
|
"observed_at": "2026-04-01T00:00:10Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..first
|
||||||
|
};
|
||||||
|
assert!(agent.apply_watch_change(&desired_change));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,11 +7,12 @@ use tracing_subscriber::EnvFilter;
|
||||||
|
|
||||||
mod agent;
|
mod agent;
|
||||||
mod process;
|
mod process;
|
||||||
|
mod watcher;
|
||||||
|
|
||||||
/// PhotonCloud NodeAgent
|
/// PhotonCloud NodeAgent
|
||||||
///
|
///
|
||||||
/// - Chainfire 上の `photoncloud/clusters/{cluster_id}/nodes/{node_id}` と
|
/// - Chainfire 上の `photoncloud/clusters/{cluster_id}/nodes/{node_id}` と
|
||||||
/// `.../instances/*` をポーリング/将来的には watch してローカル状態と比較する。
|
/// `.../instances/*` を watch しつつ、周期 heartbeat/safety reconcile も行う。
|
||||||
/// - `--apply` が指定された場合のみプロセス起動/停止を行う(デフォルトは dry-run)。
|
/// - `--apply` が指定された場合のみプロセス起動/停止を行う(デフォルトは dry-run)。
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(author, version, about)]
|
#[command(author, version, about)]
|
||||||
|
|
@ -32,7 +33,7 @@ struct Cli {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
node_id: String,
|
node_id: String,
|
||||||
|
|
||||||
/// ポーリング間隔(秒)
|
/// heartbeat / safety reconcile 間隔(秒)
|
||||||
#[arg(long, default_value_t = 15)]
|
#[arg(long, default_value_t = 15)]
|
||||||
interval_secs: u64,
|
interval_secs: u64,
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,74 +1,109 @@
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use chainfire_client::Client;
|
use chainfire_client::{Client, EventType};
|
||||||
use tokio::time::sleep;
|
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub enum WatchScope {
|
||||||
|
Key,
|
||||||
|
Prefix,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct WatchChange {
|
||||||
|
pub event_type: EventType,
|
||||||
|
pub key: Vec<u8>,
|
||||||
|
pub value: Vec<u8>,
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub revision: u64,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct ChainfireWatcher {
|
pub struct ChainfireWatcher {
|
||||||
endpoint: String,
|
endpoint: String,
|
||||||
prefix: String,
|
scope: WatchScope,
|
||||||
interval: Duration,
|
target: Vec<u8>,
|
||||||
|
reconnect_backoff: Duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ChainfireWatcher {
|
impl ChainfireWatcher {
|
||||||
pub fn new(endpoint: String, prefix: String, interval_secs: u64) -> Self {
|
pub fn key(endpoint: String, key: Vec<u8>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
endpoint,
|
endpoint,
|
||||||
prefix,
|
scope: WatchScope::Key,
|
||||||
interval: Duration::from_secs(interval_secs),
|
target: key,
|
||||||
|
reconnect_backoff: Duration::from_secs(1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn prefix(endpoint: String, prefix: Vec<u8>) -> Self {
|
||||||
|
Self {
|
||||||
|
endpoint,
|
||||||
|
scope: WatchScope::Prefix,
|
||||||
|
target: prefix,
|
||||||
|
reconnect_backoff: Duration::from_secs(1),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn watch<F>(&self, mut callback: F) -> Result<()>
|
pub async fn watch<F>(&self, mut callback: F) -> Result<()>
|
||||||
where
|
where
|
||||||
F: FnMut(Vec<(Vec<u8>, Vec<u8>)>) -> Result<()>,
|
F: FnMut(WatchChange) -> Result<()>,
|
||||||
{
|
{
|
||||||
let mut last_revision = 0u64;
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
match self.fetch_updates(last_revision).await {
|
match Client::connect(self.endpoint.clone()).await {
|
||||||
Ok((kvs, max_rev)) => {
|
Ok(mut client) => {
|
||||||
if !kvs.is_empty() {
|
let watch_result = match self.scope {
|
||||||
|
WatchScope::Key => client.watch(&self.target).await,
|
||||||
|
WatchScope::Prefix => client.watch_prefix(&self.target).await,
|
||||||
|
};
|
||||||
|
|
||||||
|
match watch_result {
|
||||||
|
Ok(mut handle) => {
|
||||||
info!(
|
info!(
|
||||||
prefix = %self.prefix,
|
scope = ?self.scope,
|
||||||
count = kvs.len(),
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
"detected changes in Chainfire"
|
watch_id = handle.id(),
|
||||||
|
"connected ChainFire watch"
|
||||||
);
|
);
|
||||||
if let Err(e) = callback(kvs) {
|
|
||||||
warn!(error = %e, "callback failed");
|
while let Some(event) = handle.recv().await {
|
||||||
} else if max_rev > last_revision {
|
if let Err(error) = callback(WatchChange {
|
||||||
last_revision = max_rev;
|
event_type: event.event_type,
|
||||||
}
|
key: event.key,
|
||||||
} else if max_rev > last_revision {
|
value: event.value,
|
||||||
last_revision = max_rev;
|
revision: event.revision,
|
||||||
}
|
}) {
|
||||||
}
|
warn!(error = %error, "watch callback failed");
|
||||||
Err(e) => {
|
|
||||||
warn!(error = %e, "failed to fetch updates from Chainfire");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sleep(self.interval).await;
|
warn!(
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
"ChainFire watch stream ended; reconnecting"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
warn!(
|
||||||
|
error = %error,
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
"failed to create ChainFire watch"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
warn!(
|
||||||
|
error = %error,
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
"failed to connect ChainFire watch"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_updates(&self, last_revision: u64) -> Result<(Vec<(Vec<u8>, Vec<u8>)>, u64)> {
|
tokio::time::sleep(self.reconnect_backoff).await;
|
||||||
let mut client = Client::connect(self.endpoint.clone()).await?;
|
|
||||||
let (kvs, _) = client.scan_prefix(self.prefix.as_bytes(), 0).await?;
|
|
||||||
|
|
||||||
// 簡易実装: 全てのKVペアを返す(revisionフィルタリングは未実装)
|
|
||||||
let mut max_rev = last_revision;
|
|
||||||
let mut result = Vec::new();
|
|
||||||
for (k, v, rev) in kvs {
|
|
||||||
if rev > last_revision {
|
|
||||||
result.push((k, v));
|
|
||||||
if rev > max_rev {
|
|
||||||
max_rev = rev;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((result, max_rev))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -7,15 +7,35 @@ use deployer_types::{
|
||||||
HostDeploymentSpec, HostDeploymentStatus, InstallState, ObservedSystemState,
|
HostDeploymentSpec, HostDeploymentStatus, InstallState, ObservedSystemState,
|
||||||
ServiceInstanceSpec,
|
ServiceInstanceSpec,
|
||||||
};
|
};
|
||||||
|
use serde_json::Value;
|
||||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio::time::sleep;
|
use tokio::sync::{mpsc, oneshot};
|
||||||
|
use tokio::time::MissedTickBehavior;
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
use crate::watcher::{ChainfireWatcher, WatchChange};
|
||||||
|
|
||||||
fn cluster_prefix(cluster_namespace: &str, cluster_id: &str) -> String {
|
fn cluster_prefix(cluster_namespace: &str, cluster_id: &str) -> String {
|
||||||
format!("{}/clusters/{}/", cluster_namespace, cluster_id)
|
format!("{}/clusters/{}/", cluster_namespace, cluster_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn nodes_prefix(cluster_namespace: &str, cluster_id: &str) -> Vec<u8> {
|
||||||
|
format!("{}nodes/", cluster_prefix(cluster_namespace, cluster_id)).into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn instances_prefix(cluster_namespace: &str, cluster_id: &str) -> Vec<u8> {
|
||||||
|
format!("{}instances/", cluster_prefix(cluster_namespace, cluster_id)).into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn host_deployments_prefix(cluster_namespace: &str, cluster_id: &str) -> Vec<u8> {
|
||||||
|
format!(
|
||||||
|
"{}deployments/hosts/",
|
||||||
|
cluster_prefix(cluster_namespace, cluster_id)
|
||||||
|
)
|
||||||
|
.into_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
fn key_node(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
|
fn key_node(cluster_namespace: &str, cluster_id: &str, node_id: &str) -> Vec<u8> {
|
||||||
format!(
|
format!(
|
||||||
"{}nodes/{}",
|
"{}nodes/{}",
|
||||||
|
|
@ -72,17 +92,8 @@ pub struct HostsCommand {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn run(command: HostsCommand) -> Result<()> {
|
pub async fn run(command: HostsCommand) -> Result<()> {
|
||||||
let controller = HostDeploymentController::new(command);
|
let mut controller = HostDeploymentController::new(command);
|
||||||
if controller.once {
|
controller.run_loop().await
|
||||||
controller.reconcile_once().await
|
|
||||||
} else {
|
|
||||||
loop {
|
|
||||||
if let Err(error) = controller.reconcile_once().await {
|
|
||||||
warn!(error = %error, "host deployment reconciliation failed");
|
|
||||||
}
|
|
||||||
sleep(controller.interval).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct HostDeploymentController {
|
struct HostDeploymentController {
|
||||||
|
|
@ -93,6 +104,102 @@ struct HostDeploymentController {
|
||||||
heartbeat_timeout_secs: u64,
|
heartbeat_timeout_secs: u64,
|
||||||
dry_run: bool,
|
dry_run: bool,
|
||||||
once: bool,
|
once: bool,
|
||||||
|
watched_node_signatures: HashMap<String, Vec<u8>>,
|
||||||
|
watched_desired_signatures: HashMap<String, Vec<u8>>,
|
||||||
|
watched_observed_signatures: HashMap<String, Vec<u8>>,
|
||||||
|
watched_instance_signatures: HashMap<String, Vec<u8>>,
|
||||||
|
watched_deployment_spec_signatures: HashMap<String, Vec<u8>>,
|
||||||
|
watched_deployment_status_signatures: HashMap<String, Vec<u8>>,
|
||||||
|
pending_kv_writes: HashMap<String, Vec<u8>>,
|
||||||
|
pending_kv_deletes: HashSet<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn spawn_watch_task(
|
||||||
|
watcher: ChainfireWatcher,
|
||||||
|
tx: mpsc::Sender<WatchChange>,
|
||||||
|
ready: oneshot::Sender<()>,
|
||||||
|
) {
|
||||||
|
let mut ready = Some(ready);
|
||||||
|
if let Err(error) = watcher
|
||||||
|
.watch_with_ready(
|
||||||
|
move || {
|
||||||
|
if let Some(ready) = ready.take() {
|
||||||
|
let _ = ready.send(());
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
move |change| {
|
||||||
|
tx.try_send(change).map_err(|send_error| {
|
||||||
|
anyhow::anyhow!("failed to enqueue watch change: {send_error}")
|
||||||
|
})
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
warn!(error = %error, "host deployment watch task exited");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_watchers_ready(receivers: Vec<oneshot::Receiver<()>>) {
|
||||||
|
for receiver in receivers {
|
||||||
|
match tokio::time::timeout(Duration::from_secs(10), receiver).await {
|
||||||
|
Ok(Ok(())) => {}
|
||||||
|
Ok(Err(_)) => warn!("host deployment watch task ended before signaling readiness"),
|
||||||
|
Err(_) => warn!("timed out waiting for host deployment watch readiness"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonical_json_signature(bytes: &[u8], drop_keys: &[&str]) -> Option<Vec<u8>> {
|
||||||
|
let mut value: Value = serde_json::from_slice(bytes).ok()?;
|
||||||
|
strip_observed_fields(&mut value, drop_keys);
|
||||||
|
serde_json::to_vec(&sort_json_value(value)).ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn strip_observed_fields(value: &mut Value, drop_keys: &[&str]) {
|
||||||
|
match value {
|
||||||
|
Value::Object(map) => {
|
||||||
|
for key in drop_keys {
|
||||||
|
map.remove(*key);
|
||||||
|
}
|
||||||
|
for nested in map.values_mut() {
|
||||||
|
strip_observed_fields(nested, drop_keys);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::Array(items) => {
|
||||||
|
for item in items {
|
||||||
|
strip_observed_fields(item, drop_keys);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sort_json_value(value: Value) -> Value {
|
||||||
|
match value {
|
||||||
|
Value::Array(items) => Value::Array(items.into_iter().map(sort_json_value).collect()),
|
||||||
|
Value::Object(map) => {
|
||||||
|
let mut keys = map.keys().cloned().collect::<Vec<_>>();
|
||||||
|
keys.sort();
|
||||||
|
let mut normalized = serde_json::Map::new();
|
||||||
|
for key in keys {
|
||||||
|
if let Some(value) = map.get(&key) {
|
||||||
|
normalized.insert(key, sort_json_value(value.clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::Object(normalized)
|
||||||
|
}
|
||||||
|
other => other,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn host_deployment_status_signature(bytes: &[u8]) -> Option<Vec<u8>> {
|
||||||
|
let value: Value = serde_json::from_slice(bytes).ok()?;
|
||||||
|
serde_json::to_vec(&serde_json::json!({
|
||||||
|
"paused_by_operator": value.get("paused_by_operator").and_then(Value::as_bool).unwrap_or(false),
|
||||||
|
"aborted": value.get("phase").and_then(Value::as_str) == Some("aborted"),
|
||||||
|
}))
|
||||||
|
.ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HostDeploymentController {
|
impl HostDeploymentController {
|
||||||
|
|
@ -105,10 +212,60 @@ impl HostDeploymentController {
|
||||||
heartbeat_timeout_secs: command.heartbeat_timeout_secs,
|
heartbeat_timeout_secs: command.heartbeat_timeout_secs,
|
||||||
dry_run: command.dry_run,
|
dry_run: command.dry_run,
|
||||||
once: command.once,
|
once: command.once,
|
||||||
|
watched_node_signatures: HashMap::new(),
|
||||||
|
watched_desired_signatures: HashMap::new(),
|
||||||
|
watched_observed_signatures: HashMap::new(),
|
||||||
|
watched_instance_signatures: HashMap::new(),
|
||||||
|
watched_deployment_spec_signatures: HashMap::new(),
|
||||||
|
watched_deployment_status_signatures: HashMap::new(),
|
||||||
|
pending_kv_writes: HashMap::new(),
|
||||||
|
pending_kv_deletes: HashSet::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn reconcile_once(&self) -> Result<()> {
|
async fn run_loop(&mut self) -> Result<()> {
|
||||||
|
if self.once {
|
||||||
|
return self.reconcile_once().await;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.reconcile_once().await?;
|
||||||
|
|
||||||
|
let (watch_tx, mut watch_rx) = mpsc::channel::<WatchChange>(128);
|
||||||
|
let ready_receivers = self.spawn_watchers(watch_tx);
|
||||||
|
wait_for_watchers_ready(ready_receivers).await;
|
||||||
|
self.reconcile_once().await?;
|
||||||
|
let mut interval = tokio::time::interval(self.interval);
|
||||||
|
interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||||
|
interval.tick().await;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
tokio::select! {
|
||||||
|
_ = interval.tick() => {
|
||||||
|
if let Err(error) = self.reconcile_once().await {
|
||||||
|
warn!(error = %error, "host deployment reconciliation failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
maybe_change = watch_rx.recv() => {
|
||||||
|
let Some(change) = maybe_change else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut should_reconcile = self.apply_watch_change(&change);
|
||||||
|
while let Ok(extra) = watch_rx.try_recv() {
|
||||||
|
should_reconcile |= self.apply_watch_change(&extra);
|
||||||
|
}
|
||||||
|
|
||||||
|
if should_reconcile {
|
||||||
|
if let Err(error) = self.reconcile_once().await {
|
||||||
|
warn!(error = %error, "host deployment watch-triggered reconciliation failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn reconcile_once(&mut self) -> Result<()> {
|
||||||
let mut client = Client::connect(self.endpoint.clone()).await?;
|
let mut client = Client::connect(self.endpoint.clone()).await?;
|
||||||
let nodes = self.load_nodes(&mut client).await?;
|
let nodes = self.load_nodes(&mut client).await?;
|
||||||
let desired_systems = self.load_desired_systems(&mut client).await?;
|
let desired_systems = self.load_desired_systems(&mut client).await?;
|
||||||
|
|
@ -149,52 +306,309 @@ impl HostDeploymentController {
|
||||||
}
|
}
|
||||||
|
|
||||||
for desired in &plan.desired_upserts {
|
for desired in &plan.desired_upserts {
|
||||||
client
|
let key = key_desired_system(&self.cluster_namespace, &self.cluster_id, &desired.node_id);
|
||||||
.put(
|
let payload = serde_json::to_vec(desired)?;
|
||||||
&key_desired_system(
|
let key_str = self.record_pending_write(&key, payload.clone());
|
||||||
&self.cluster_namespace,
|
if let Err(error) = client.put(&key, &payload).await {
|
||||||
&self.cluster_id,
|
self.pending_kv_writes.remove(&key_str);
|
||||||
&desired.node_id,
|
return Err(error.into());
|
||||||
),
|
}
|
||||||
&serde_json::to_vec(desired)?,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for node_id in &plan.desired_deletes {
|
for node_id in &plan.desired_deletes {
|
||||||
client
|
let key = key_desired_system(&self.cluster_namespace, &self.cluster_id, node_id);
|
||||||
.delete(&key_desired_system(
|
let key_str = self.record_pending_delete(&key);
|
||||||
&self.cluster_namespace,
|
if !client.delete(&key).await? {
|
||||||
&self.cluster_id,
|
self.pending_kv_deletes.remove(&key_str);
|
||||||
node_id,
|
}
|
||||||
))
|
|
||||||
.await?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for node in plan.node_updates.values() {
|
for node in plan.node_updates.values() {
|
||||||
client
|
let key = key_node(&self.cluster_namespace, &self.cluster_id, &node.node_id);
|
||||||
.put(
|
let payload = serde_json::to_vec(node)?;
|
||||||
&key_node(&self.cluster_namespace, &self.cluster_id, &node.node_id),
|
let key_str = self.record_pending_write(&key, payload.clone());
|
||||||
&serde_json::to_vec(node)?,
|
if let Err(error) = client.put(&key, &payload).await {
|
||||||
)
|
self.pending_kv_writes.remove(&key_str);
|
||||||
.await?;
|
return Err(error.into());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
client
|
let key = key_host_deployment_status(
|
||||||
.put(
|
|
||||||
&key_host_deployment_status(
|
|
||||||
&self.cluster_namespace,
|
&self.cluster_namespace,
|
||||||
&self.cluster_id,
|
&self.cluster_id,
|
||||||
&deployment.name,
|
&deployment.name,
|
||||||
),
|
);
|
||||||
&serde_json::to_vec(&plan.status)?,
|
let payload = serde_json::to_vec(&plan.status)?;
|
||||||
)
|
let key_str = self.record_pending_write(&key, payload.clone());
|
||||||
.await?;
|
if let Err(error) = client.put(&key, &payload).await {
|
||||||
|
self.pending_kv_writes.remove(&key_str);
|
||||||
|
return Err(error.into());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn spawn_watchers(&self, tx: mpsc::Sender<WatchChange>) -> Vec<oneshot::Receiver<()>> {
|
||||||
|
let mut ready_receivers = Vec::new();
|
||||||
|
|
||||||
|
let node_watcher = ChainfireWatcher::prefix(
|
||||||
|
self.endpoint.clone(),
|
||||||
|
nodes_prefix(&self.cluster_namespace, &self.cluster_id),
|
||||||
|
);
|
||||||
|
let (node_ready_tx, node_ready_rx) = oneshot::channel();
|
||||||
|
tokio::spawn(spawn_watch_task(node_watcher, tx.clone(), node_ready_tx));
|
||||||
|
ready_receivers.push(node_ready_rx);
|
||||||
|
|
||||||
|
let instance_watcher = ChainfireWatcher::prefix(
|
||||||
|
self.endpoint.clone(),
|
||||||
|
instances_prefix(&self.cluster_namespace, &self.cluster_id),
|
||||||
|
);
|
||||||
|
let (instance_ready_tx, instance_ready_rx) = oneshot::channel();
|
||||||
|
tokio::spawn(spawn_watch_task(
|
||||||
|
instance_watcher,
|
||||||
|
tx.clone(),
|
||||||
|
instance_ready_tx,
|
||||||
|
));
|
||||||
|
ready_receivers.push(instance_ready_rx);
|
||||||
|
|
||||||
|
let deployment_watcher = ChainfireWatcher::prefix(
|
||||||
|
self.endpoint.clone(),
|
||||||
|
host_deployments_prefix(&self.cluster_namespace, &self.cluster_id),
|
||||||
|
);
|
||||||
|
let (deployment_ready_tx, deployment_ready_rx) = oneshot::channel();
|
||||||
|
tokio::spawn(spawn_watch_task(deployment_watcher, tx, deployment_ready_tx));
|
||||||
|
ready_receivers.push(deployment_ready_rx);
|
||||||
|
|
||||||
|
ready_receivers
|
||||||
|
}
|
||||||
|
|
||||||
|
fn record_pending_write(&mut self, key: &[u8], payload: Vec<u8>) -> String {
|
||||||
|
let key_str = String::from_utf8_lossy(key).to_string();
|
||||||
|
self.pending_kv_deletes.remove(&key_str);
|
||||||
|
self.pending_kv_writes.insert(key_str.clone(), payload);
|
||||||
|
key_str
|
||||||
|
}
|
||||||
|
|
||||||
|
fn record_pending_delete(&mut self, key: &[u8]) -> String {
|
||||||
|
let key_str = String::from_utf8_lossy(key).to_string();
|
||||||
|
self.pending_kv_writes.remove(&key_str);
|
||||||
|
self.pending_kv_deletes.insert(key_str.clone());
|
||||||
|
key_str
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_pending_write(&mut self, key: &str, change: &WatchChange) -> Option<Vec<u8>> {
|
||||||
|
let expected = self.pending_kv_writes.remove(key)?;
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Put) && change.value == expected {
|
||||||
|
self.pending_kv_deletes.remove(key);
|
||||||
|
return Some(expected);
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_pending_delete(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
self.pending_kv_deletes.remove(key)
|
||||||
|
&& matches!(change.event_type, chainfire_client::EventType::Delete)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_watch_change(&mut self, change: &WatchChange) -> bool {
|
||||||
|
let key = String::from_utf8_lossy(&change.key).to_string();
|
||||||
|
let nodes_prefix = String::from_utf8_lossy(&nodes_prefix(&self.cluster_namespace, &self.cluster_id))
|
||||||
|
.to_string();
|
||||||
|
let instances_prefix =
|
||||||
|
String::from_utf8_lossy(&instances_prefix(&self.cluster_namespace, &self.cluster_id))
|
||||||
|
.to_string();
|
||||||
|
let deployments_prefix = String::from_utf8_lossy(&host_deployments_prefix(
|
||||||
|
&self.cluster_namespace,
|
||||||
|
&self.cluster_id,
|
||||||
|
))
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
if let Some(suffix) = key.strip_prefix(&nodes_prefix) {
|
||||||
|
if suffix.ends_with("/desired-system") {
|
||||||
|
return self.apply_desired_system_watch_change(&key, change);
|
||||||
|
}
|
||||||
|
if suffix.ends_with("/observed-system") {
|
||||||
|
return self.apply_observed_system_watch_change(&key, change);
|
||||||
|
}
|
||||||
|
if !suffix.is_empty() && !suffix.contains('/') {
|
||||||
|
return self.apply_node_record_watch_change(&key, change);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if key.starts_with(&instances_prefix) {
|
||||||
|
return self.apply_instance_watch_change(&key, change);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(suffix) = key.strip_prefix(&deployments_prefix) {
|
||||||
|
if suffix.ends_with("/spec") {
|
||||||
|
return self.apply_deployment_spec_watch_change(&key, change);
|
||||||
|
}
|
||||||
|
if suffix.ends_with("/status") {
|
||||||
|
return self.apply_deployment_status_watch_change(&key, change);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_node_record_watch_change(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
if self.consume_pending_delete(key, change) {
|
||||||
|
self.watched_node_signatures.remove(key);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if let Some(payload) = self.consume_pending_write(key, change) {
|
||||||
|
if let Some(signature) = canonical_json_signature(&payload, &["last_heartbeat"]) {
|
||||||
|
self.watched_node_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
} else {
|
||||||
|
self.watched_node_signatures.remove(key);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_node_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(signature) = canonical_json_signature(&change.value, &["last_heartbeat"]) else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if self.watched_node_signatures.get(key) == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.watched_node_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_desired_system_watch_change(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
if self.consume_pending_delete(key, change) {
|
||||||
|
self.watched_desired_signatures.remove(key);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if let Some(payload) = self.consume_pending_write(key, change) {
|
||||||
|
if let Some(signature) = canonical_json_signature(&payload, &[]) {
|
||||||
|
self.watched_desired_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
} else {
|
||||||
|
self.watched_desired_signatures.remove(key);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_desired_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(signature) = canonical_json_signature(&change.value, &[]) else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if self.watched_desired_signatures.get(key) == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.watched_desired_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_observed_system_watch_change(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_observed_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(signature) = canonical_json_signature(
|
||||||
|
&change.value,
|
||||||
|
&[
|
||||||
|
"flake_root",
|
||||||
|
"configured_system",
|
||||||
|
"current_system",
|
||||||
|
"booted_system",
|
||||||
|
"rollback_system",
|
||||||
|
"switch_action",
|
||||||
|
"reboot_required",
|
||||||
|
"last_attempt",
|
||||||
|
"last_success",
|
||||||
|
"last_error",
|
||||||
|
],
|
||||||
|
) else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if self.watched_observed_signatures.get(key) == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.watched_observed_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_instance_watch_change(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_instance_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(signature) = canonical_json_signature(&change.value, &["last_heartbeat", "observed_at"])
|
||||||
|
else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if self.watched_instance_signatures.get(key) == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.watched_instance_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_deployment_spec_watch_change(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_deployment_spec_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(signature) = canonical_json_signature(&change.value, &[]) else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if self.watched_deployment_spec_signatures.get(key) == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.watched_deployment_spec_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn apply_deployment_status_watch_change(&mut self, key: &str, change: &WatchChange) -> bool {
|
||||||
|
if self.consume_pending_delete(key, change) {
|
||||||
|
self.watched_deployment_status_signatures.remove(key);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if let Some(payload) = self.consume_pending_write(key, change) {
|
||||||
|
if let Some(signature) = host_deployment_status_signature(&payload) {
|
||||||
|
self.watched_deployment_status_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
} else {
|
||||||
|
self.watched_deployment_status_signatures.remove(key);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if matches!(change.event_type, chainfire_client::EventType::Delete) {
|
||||||
|
self.watched_deployment_status_signatures.remove(key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(signature) = host_deployment_status_signature(&change.value) else {
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if self.watched_deployment_status_signatures.get(key) == Some(&signature) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
self.watched_deployment_status_signatures
|
||||||
|
.insert(key.to_string(), signature);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
async fn load_nodes(&self, client: &mut Client) -> Result<Vec<ClusterNodeRecord>> {
|
async fn load_nodes(&self, client: &mut Client) -> Result<Vec<ClusterNodeRecord>> {
|
||||||
let prefix = format!(
|
let prefix = format!(
|
||||||
"{}nodes/",
|
"{}nodes/",
|
||||||
|
|
@ -385,6 +799,9 @@ fn plan_host_deployment(
|
||||||
let operator_paused = existing_status
|
let operator_paused = existing_status
|
||||||
.map(|status| status.paused_by_operator)
|
.map(|status| status.paused_by_operator)
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
|
let aborted = existing_status
|
||||||
|
.and_then(|status| status.phase.as_deref())
|
||||||
|
== Some("aborted");
|
||||||
let spec_paused = deployment.paused.unwrap_or(false);
|
let spec_paused = deployment.paused.unwrap_or(false);
|
||||||
let mut desired_deletes = desired_systems
|
let mut desired_deletes = desired_systems
|
||||||
.iter()
|
.iter()
|
||||||
|
|
@ -395,6 +812,44 @@ fn plan_host_deployment(
|
||||||
.map(|(node_id, _)| node_id.clone())
|
.map(|(node_id, _)| node_id.clone())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
if aborted {
|
||||||
|
desired_deletes = desired_systems
|
||||||
|
.iter()
|
||||||
|
.filter(|(_, desired)| {
|
||||||
|
desired.deployment_id.as_deref() == Some(deployment.name.as_str())
|
||||||
|
})
|
||||||
|
.map(|(node_id, _)| node_id.clone())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
for node in &selector_matches {
|
||||||
|
if node.state.as_deref() == Some("draining") {
|
||||||
|
let mut updated = (*node).clone();
|
||||||
|
updated.state = Some("active".to_string());
|
||||||
|
node_updates.insert(updated.node_id.clone(), updated);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut status = existing_status.cloned().unwrap_or_default();
|
||||||
|
status.name = deployment.name.clone();
|
||||||
|
status.paused = true;
|
||||||
|
status.paused_by_operator = true;
|
||||||
|
status.phase = Some("aborted".to_string());
|
||||||
|
if status.message.is_none() {
|
||||||
|
status.message = Some("aborted by operator".to_string());
|
||||||
|
}
|
||||||
|
status.updated_at = Some(now);
|
||||||
|
|
||||||
|
desired_deletes.sort();
|
||||||
|
desired_deletes.dedup();
|
||||||
|
|
||||||
|
return HostDeploymentPlan {
|
||||||
|
status,
|
||||||
|
desired_upserts: Vec::new(),
|
||||||
|
desired_deletes,
|
||||||
|
node_updates,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
for node in &selector_matches {
|
for node in &selector_matches {
|
||||||
let desired = desired_systems.get(&node.node_id);
|
let desired = desired_systems.get(&node.node_id);
|
||||||
let observed = observed_systems.get(&node.node_id);
|
let observed = observed_systems.get(&node.node_id);
|
||||||
|
|
@ -707,6 +1162,7 @@ fn dedup_sorted(mut values: Vec<String>) -> Vec<String> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use chainfire_client::EventType;
|
||||||
|
|
||||||
fn test_node(node_id: &str, failure_domain: &str) -> ClusterNodeRecord {
|
fn test_node(node_id: &str, failure_domain: &str) -> ClusterNodeRecord {
|
||||||
ClusterNodeRecord {
|
ClusterNodeRecord {
|
||||||
|
|
@ -760,6 +1216,18 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn test_controller() -> HostDeploymentController {
|
||||||
|
HostDeploymentController::new(HostsCommand {
|
||||||
|
endpoint: "http://127.0.0.1:7000".to_string(),
|
||||||
|
cluster_namespace: "photoncloud".to_string(),
|
||||||
|
cluster_id: "test-cluster".to_string(),
|
||||||
|
interval_secs: 1,
|
||||||
|
heartbeat_timeout_secs: 300,
|
||||||
|
dry_run: true,
|
||||||
|
once: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn plan_rollout_starts_one_node_per_batch() {
|
fn plan_rollout_starts_one_node_per_batch() {
|
||||||
let deployment = test_deployment();
|
let deployment = test_deployment();
|
||||||
|
|
@ -859,4 +1327,185 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_eq!(plan.status.in_progress_nodes, vec!["node01".to_string()]);
|
assert_eq!(plan.status.in_progress_nodes, vec!["node01".to_string()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn plan_rollout_keeps_aborted_state_and_clears_desired_system() {
|
||||||
|
let deployment = test_deployment();
|
||||||
|
let mut draining = test_node("node01", "rack-a");
|
||||||
|
draining.state = Some("draining".to_string());
|
||||||
|
let nodes = vec![draining];
|
||||||
|
let desired = HashMap::from([(
|
||||||
|
"node01".to_string(),
|
||||||
|
DesiredSystemSpec {
|
||||||
|
node_id: "node01".to_string(),
|
||||||
|
deployment_id: Some("worker-rollout".to_string()),
|
||||||
|
nixos_configuration: Some("worker-golden".to_string()),
|
||||||
|
target_system: Some("/nix/store/worker-golden".to_string()),
|
||||||
|
flake_ref: None,
|
||||||
|
switch_action: Some("switch".to_string()),
|
||||||
|
health_check_command: Vec::new(),
|
||||||
|
rollback_on_failure: Some(true),
|
||||||
|
drain_before_apply: Some(false),
|
||||||
|
},
|
||||||
|
)]);
|
||||||
|
let status = HostDeploymentStatus {
|
||||||
|
name: "worker-rollout".to_string(),
|
||||||
|
phase: Some("aborted".to_string()),
|
||||||
|
paused: true,
|
||||||
|
paused_by_operator: true,
|
||||||
|
message: Some("aborted by operator".to_string()),
|
||||||
|
..HostDeploymentStatus::default()
|
||||||
|
};
|
||||||
|
|
||||||
|
let plan = plan_host_deployment(
|
||||||
|
&deployment,
|
||||||
|
Some(&status),
|
||||||
|
&nodes,
|
||||||
|
&desired,
|
||||||
|
&HashMap::new(),
|
||||||
|
&[],
|
||||||
|
300,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(plan.desired_upserts.is_empty());
|
||||||
|
assert_eq!(plan.desired_deletes, vec!["node01".to_string()]);
|
||||||
|
assert_eq!(plan.status.phase.as_deref(), Some("aborted"));
|
||||||
|
assert!(plan.status.paused);
|
||||||
|
assert_eq!(
|
||||||
|
plan.node_updates
|
||||||
|
.get("node01")
|
||||||
|
.and_then(|node| node.state.as_deref()),
|
||||||
|
Some("active")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn host_watch_ignores_node_heartbeat_only_changes() {
|
||||||
|
let mut controller = test_controller();
|
||||||
|
let node_key = key_node(
|
||||||
|
&controller.cluster_namespace,
|
||||||
|
&controller.cluster_id,
|
||||||
|
"node01",
|
||||||
|
);
|
||||||
|
|
||||||
|
let first = WatchChange {
|
||||||
|
event_type: EventType::Put,
|
||||||
|
key: node_key.clone(),
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"node_id": "node01",
|
||||||
|
"hostname": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"roles": ["worker"],
|
||||||
|
"labels": {"tier": "general"},
|
||||||
|
"state": "active",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:00Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
revision: 1,
|
||||||
|
};
|
||||||
|
assert!(controller.apply_watch_change(&first));
|
||||||
|
|
||||||
|
let heartbeat_only = WatchChange {
|
||||||
|
revision: 2,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"node_id": "node01",
|
||||||
|
"hostname": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"roles": ["worker"],
|
||||||
|
"labels": {"tier": "general"},
|
||||||
|
"state": "active",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:05Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..first.clone()
|
||||||
|
};
|
||||||
|
assert!(!controller.apply_watch_change(&heartbeat_only));
|
||||||
|
|
||||||
|
let drain = WatchChange {
|
||||||
|
revision: 3,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"node_id": "node01",
|
||||||
|
"hostname": "node01",
|
||||||
|
"ip": "10.0.0.11",
|
||||||
|
"roles": ["worker"],
|
||||||
|
"labels": {"tier": "general"},
|
||||||
|
"state": "draining",
|
||||||
|
"last_heartbeat": "2026-04-01T00:00:10Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..first
|
||||||
|
};
|
||||||
|
assert!(controller.apply_watch_change(&drain));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn host_watch_reacts_only_to_operator_pause_or_abort_status_changes() {
|
||||||
|
let mut controller = test_controller();
|
||||||
|
let key = key_host_deployment_status(
|
||||||
|
&controller.cluster_namespace,
|
||||||
|
&controller.cluster_id,
|
||||||
|
"worker-rollout",
|
||||||
|
);
|
||||||
|
|
||||||
|
let running = WatchChange {
|
||||||
|
event_type: EventType::Put,
|
||||||
|
key: key.clone(),
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"name": "worker-rollout",
|
||||||
|
"phase": "running",
|
||||||
|
"paused": false,
|
||||||
|
"paused_by_operator": false,
|
||||||
|
"selected_nodes": ["node01"],
|
||||||
|
"in_progress_nodes": ["node01"],
|
||||||
|
"updated_at": "2026-04-01T00:00:00Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
revision: 1,
|
||||||
|
};
|
||||||
|
assert!(controller.apply_watch_change(&running));
|
||||||
|
|
||||||
|
let completed = WatchChange {
|
||||||
|
revision: 2,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"name": "worker-rollout",
|
||||||
|
"phase": "completed",
|
||||||
|
"paused": false,
|
||||||
|
"paused_by_operator": false,
|
||||||
|
"selected_nodes": ["node01"],
|
||||||
|
"completed_nodes": ["node01"],
|
||||||
|
"updated_at": "2026-04-01T00:00:10Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..running.clone()
|
||||||
|
};
|
||||||
|
assert!(!controller.apply_watch_change(&completed));
|
||||||
|
|
||||||
|
let paused = WatchChange {
|
||||||
|
revision: 3,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"name": "worker-rollout",
|
||||||
|
"phase": "paused",
|
||||||
|
"paused": true,
|
||||||
|
"paused_by_operator": true,
|
||||||
|
"updated_at": "2026-04-01T00:00:20Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..running.clone()
|
||||||
|
};
|
||||||
|
assert!(controller.apply_watch_change(&paused));
|
||||||
|
|
||||||
|
let aborted = WatchChange {
|
||||||
|
revision: 4,
|
||||||
|
value: serde_json::to_vec(&serde_json::json!({
|
||||||
|
"name": "worker-rollout",
|
||||||
|
"phase": "aborted",
|
||||||
|
"paused": true,
|
||||||
|
"paused_by_operator": true,
|
||||||
|
"updated_at": "2026-04-01T00:00:30Z"
|
||||||
|
}))
|
||||||
|
.unwrap(),
|
||||||
|
..running
|
||||||
|
};
|
||||||
|
assert!(controller.apply_watch_change(&aborted));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,7 @@ use flashdns_api::proto::{
|
||||||
};
|
};
|
||||||
|
|
||||||
mod hosts;
|
mod hosts;
|
||||||
|
mod watcher;
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
#[command(author, version, about)]
|
#[command(author, version, about)]
|
||||||
|
|
|
||||||
122
deployer/crates/plasmacloud-reconciler/src/watcher.rs
Normal file
122
deployer/crates/plasmacloud-reconciler/src/watcher.rs
Normal file
|
|
@ -0,0 +1,122 @@
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use chainfire_client::{Client, EventType};
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub enum WatchScope {
|
||||||
|
Key,
|
||||||
|
Prefix,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct WatchChange {
|
||||||
|
pub event_type: EventType,
|
||||||
|
pub key: Vec<u8>,
|
||||||
|
pub value: Vec<u8>,
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub revision: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct ChainfireWatcher {
|
||||||
|
endpoint: String,
|
||||||
|
scope: WatchScope,
|
||||||
|
target: Vec<u8>,
|
||||||
|
reconnect_backoff: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChainfireWatcher {
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn key(endpoint: String, key: Vec<u8>) -> Self {
|
||||||
|
Self {
|
||||||
|
endpoint,
|
||||||
|
scope: WatchScope::Key,
|
||||||
|
target: key,
|
||||||
|
reconnect_backoff: Duration::from_secs(1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn prefix(endpoint: String, prefix: Vec<u8>) -> Self {
|
||||||
|
Self {
|
||||||
|
endpoint,
|
||||||
|
scope: WatchScope::Prefix,
|
||||||
|
target: prefix,
|
||||||
|
reconnect_backoff: Duration::from_secs(1),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub async fn watch<F>(&self, mut callback: F) -> Result<()>
|
||||||
|
where
|
||||||
|
F: FnMut(WatchChange) -> Result<()>,
|
||||||
|
{
|
||||||
|
self.watch_with_ready(|| Ok(()), move |change| callback(change))
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn watch_with_ready<F, G>(&self, mut on_connected: G, mut callback: F) -> Result<()>
|
||||||
|
where
|
||||||
|
F: FnMut(WatchChange) -> Result<()>,
|
||||||
|
G: FnMut() -> Result<()>,
|
||||||
|
{
|
||||||
|
loop {
|
||||||
|
match Client::connect(self.endpoint.clone()).await {
|
||||||
|
Ok(mut client) => {
|
||||||
|
let watch_result = match self.scope {
|
||||||
|
WatchScope::Key => client.watch(&self.target).await,
|
||||||
|
WatchScope::Prefix => client.watch_prefix(&self.target).await,
|
||||||
|
};
|
||||||
|
|
||||||
|
match watch_result {
|
||||||
|
Ok(mut handle) => {
|
||||||
|
info!(
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
watch_id = handle.id(),
|
||||||
|
"connected ChainFire watch"
|
||||||
|
);
|
||||||
|
on_connected()?;
|
||||||
|
|
||||||
|
while let Some(event) = handle.recv().await {
|
||||||
|
if let Err(error) = callback(WatchChange {
|
||||||
|
event_type: event.event_type,
|
||||||
|
key: event.key,
|
||||||
|
value: event.value,
|
||||||
|
revision: event.revision,
|
||||||
|
}) {
|
||||||
|
warn!(error = %error, "watch callback failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
warn!(
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
"ChainFire watch stream ended; reconnecting"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
warn!(
|
||||||
|
error = %error,
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
"failed to create ChainFire watch"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
warn!(
|
||||||
|
error = %error,
|
||||||
|
scope = ?self.scope,
|
||||||
|
target = %String::from_utf8_lossy(&self.target),
|
||||||
|
"failed to connect ChainFire watch"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tokio::time::sleep(self.reconnect_backoff).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -41,10 +41,16 @@ run_fleet_scheduler_bin() {
|
||||||
|
|
||||||
tmp_dir="$(mktemp -d)"
|
tmp_dir="$(mktemp -d)"
|
||||||
cf_pid=""
|
cf_pid=""
|
||||||
|
controller_pids=()
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
set +e
|
set +e
|
||||||
|
|
||||||
|
for pid in "${controller_pids[@]}"; do
|
||||||
|
kill "$pid" 2>/dev/null || true
|
||||||
|
wait "$pid" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
|
||||||
if [[ -d "$tmp_dir/pids" ]]; then
|
if [[ -d "$tmp_dir/pids" ]]; then
|
||||||
while IFS= read -r -d '' pid_file; do
|
while IFS= read -r -d '' pid_file; do
|
||||||
[[ -f "$pid_file" ]] || continue
|
[[ -f "$pid_file" ]] || continue
|
||||||
|
|
@ -57,7 +63,11 @@ cleanup() {
|
||||||
wait "$cf_pid" 2>/dev/null || true
|
wait "$cf_pid" 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ "${PHOTONCLOUD_KEEP_TMP:-}" == "1" ]]; then
|
||||||
|
echo "Keeping temporary directory: $tmp_dir" >&2
|
||||||
|
else
|
||||||
rm -rf "$tmp_dir"
|
rm -rf "$tmp_dir"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
@ -364,7 +374,7 @@ run_deployer_ctl() {
|
||||||
"$@"
|
"$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
run_node_agent_once() {
|
run_node_agent_bg() {
|
||||||
local node_id="$1"
|
local node_id="$1"
|
||||||
local pid_dir="$tmp_dir/pids/$node_id"
|
local pid_dir="$tmp_dir/pids/$node_id"
|
||||||
mkdir -p "$pid_dir"
|
mkdir -p "$pid_dir"
|
||||||
|
|
@ -375,26 +385,111 @@ run_node_agent_once() {
|
||||||
--pid-dir "$pid_dir" \
|
--pid-dir "$pid_dir" \
|
||||||
--interval-secs 1 \
|
--interval-secs 1 \
|
||||||
--apply \
|
--apply \
|
||||||
--once
|
>"$tmp_dir/node-agent-${node_id}.log" 2>&1 &
|
||||||
|
controller_pids+=("$!")
|
||||||
}
|
}
|
||||||
|
|
||||||
run_scheduler_once() {
|
run_scheduler_bg() {
|
||||||
run_fleet_scheduler_bin \
|
run_fleet_scheduler_bin \
|
||||||
--chainfire-endpoint "$endpoint" \
|
--chainfire-endpoint "$endpoint" \
|
||||||
--cluster-id test-cluster \
|
--cluster-id test-cluster \
|
||||||
--interval-secs 1 \
|
--interval-secs 300 \
|
||||||
--once
|
>"$tmp_dir/fleet-scheduler.log" 2>&1 &
|
||||||
|
controller_pids+=("$!")
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_service_state() {
|
||||||
|
local service_name="$1"
|
||||||
|
local expected_phase="$2"
|
||||||
|
local expected_instances="$3"
|
||||||
|
local expected_state="${4:--}"
|
||||||
|
local timeout_secs="${5:-120}"
|
||||||
|
local inspect_path="$tmp_dir/${service_name}-wait.json"
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
if run_deployer_ctl service inspect --name "$service_name" --include-instances >"$inspect_path" 2>/dev/null; then
|
||||||
|
if python3 - "$inspect_path" "$expected_phase" "$expected_instances" "$expected_state" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
||||||
|
expected_phase = sys.argv[2]
|
||||||
|
expected_instances = int(sys.argv[3])
|
||||||
|
expected_state = sys.argv[4]
|
||||||
|
|
||||||
|
status = payload.get("status") or {}
|
||||||
|
instances = payload.get("instances") or []
|
||||||
|
|
||||||
|
if status.get("phase") != expected_phase:
|
||||||
|
raise SystemExit(1)
|
||||||
|
if len(instances) != expected_instances:
|
||||||
|
raise SystemExit(1)
|
||||||
|
if expected_state != "-":
|
||||||
|
actual_states = sorted(instance.get("state") for instance in instances)
|
||||||
|
if actual_states != [expected_state] * expected_instances:
|
||||||
|
raise SystemExit(1)
|
||||||
|
PY
|
||||||
|
then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "timed out waiting for ${service_name} phase=${expected_phase} instances=${expected_instances}" >&2
|
||||||
|
[[ -f "$inspect_path" ]] && cat "$inspect_path" >&2
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_endpoint_convergence() {
|
||||||
|
local timeout_secs="${1:-60}"
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
if python3 - <<'PY'
|
||||||
|
import socket
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
with urllib.request.urlopen("http://127.0.0.2:18080/", timeout=5) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
raise SystemExit(f"node01 endpoint returned {response.status}")
|
||||||
|
with urllib.request.urlopen("http://127.0.0.2:18081/", timeout=5) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
raise SystemExit(f"node01 worker endpoint returned {response.status}")
|
||||||
|
|
||||||
|
for port, label in ((18080, "api"), (18081, "worker")):
|
||||||
|
sock = socket.socket()
|
||||||
|
sock.settimeout(1.5)
|
||||||
|
try:
|
||||||
|
sock.connect(("127.0.0.3", port))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise SystemExit(f"node02 {label} endpoint still accepts connections after scale-down")
|
||||||
|
finally:
|
||||||
|
sock.close()
|
||||||
|
PY
|
||||||
|
then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "timed out waiting for endpoint convergence after scale-down" >&2
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "Applying cluster declaration"
|
echo "Applying cluster declaration"
|
||||||
run_deployer_ctl apply --config "$tmp_dir/cluster.yaml"
|
run_deployer_ctl apply --config "$tmp_dir/cluster.yaml"
|
||||||
|
|
||||||
echo "Activating nodes through node-agent"
|
echo "Starting watch-driven controllers"
|
||||||
run_node_agent_once node01
|
run_scheduler_bg
|
||||||
run_node_agent_once node02
|
run_node_agent_bg node01
|
||||||
|
run_node_agent_bg node02
|
||||||
|
|
||||||
echo "Scheduling managed instances"
|
echo "Waiting for worker to remain blocked until api becomes healthy"
|
||||||
run_scheduler_once
|
wait_for_service_state worker blocked 0 - 120
|
||||||
|
|
||||||
echo "Validating dependency block before api is healthy"
|
echo "Validating dependency block before api is healthy"
|
||||||
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/instances/worker/" >"$tmp_dir/worker-blocked.dump"
|
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/instances/worker/" >"$tmp_dir/worker-blocked.dump"
|
||||||
|
|
@ -455,11 +550,7 @@ print("service inspect reports blocked dependency state")
|
||||||
PY
|
PY
|
||||||
|
|
||||||
echo "Reconciling processes and health for api"
|
echo "Reconciling processes and health for api"
|
||||||
for _ in 1 2 3; do
|
wait_for_service_state api healthy 2 healthy 120
|
||||||
run_node_agent_once node01
|
|
||||||
run_node_agent_once node02
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
run_deployer_ctl service inspect --name api --include-instances >"$tmp_dir/api-inspect-healthy.json"
|
run_deployer_ctl service inspect --name api --include-instances >"$tmp_dir/api-inspect-healthy.json"
|
||||||
python3 - "$tmp_dir/api-inspect-healthy.json" <<'PY'
|
python3 - "$tmp_dir/api-inspect-healthy.json" <<'PY'
|
||||||
|
|
@ -478,15 +569,8 @@ if len(instances) != 2:
|
||||||
print("api service inspect refreshed to healthy from node-agent updates")
|
print("api service inspect refreshed to healthy from node-agent updates")
|
||||||
PY
|
PY
|
||||||
|
|
||||||
echo "Re-running scheduler after api became healthy"
|
echo "Waiting for dependent worker service to converge automatically"
|
||||||
run_scheduler_once
|
wait_for_service_state worker healthy 2 healthy 120
|
||||||
|
|
||||||
echo "Reconciling processes and health for dependent worker service"
|
|
||||||
for _ in 1 2 3; do
|
|
||||||
run_node_agent_once node01
|
|
||||||
run_node_agent_once node02
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Validating HTTP endpoints"
|
echo "Validating HTTP endpoints"
|
||||||
python3 - <<'PY'
|
python3 - <<'PY'
|
||||||
|
|
@ -594,25 +678,9 @@ PY
|
||||||
echo "Applying scaled declaration"
|
echo "Applying scaled declaration"
|
||||||
run_deployer_ctl apply --config "$tmp_dir/cluster-scaled.yaml" --prune
|
run_deployer_ctl apply --config "$tmp_dir/cluster-scaled.yaml" --prune
|
||||||
|
|
||||||
echo "Re-running scheduler after scale-down"
|
echo "Waiting for scaled services to converge automatically"
|
||||||
run_scheduler_once
|
wait_for_service_state api healthy 1 healthy 120
|
||||||
|
wait_for_service_state worker healthy 1 healthy 120
|
||||||
echo "Reconciling api after scale-down"
|
|
||||||
for _ in 1 2 3; do
|
|
||||||
run_node_agent_once node01
|
|
||||||
run_node_agent_once node02
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Re-running scheduler after scaled api became healthy"
|
|
||||||
run_scheduler_once
|
|
||||||
|
|
||||||
echo "Reconciling dependent worker service after scale-down"
|
|
||||||
for _ in 1 2 3; do
|
|
||||||
run_node_agent_once node01
|
|
||||||
run_node_agent_once node02
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Inspecting scaled instance state in ChainFire"
|
echo "Inspecting scaled instance state in ChainFire"
|
||||||
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/instances/api/" >"$tmp_dir/instances-scaled.dump"
|
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/instances/api/" >"$tmp_dir/instances-scaled.dump"
|
||||||
|
|
@ -695,30 +763,7 @@ print("service inspect reports scaled healthy worker state without waiting for s
|
||||||
PY
|
PY
|
||||||
|
|
||||||
echo "Validating endpoint convergence after scale-down"
|
echo "Validating endpoint convergence after scale-down"
|
||||||
python3 - <<'PY'
|
wait_for_endpoint_convergence 60
|
||||||
import socket
|
echo "Endpoint convergence validated"
|
||||||
import urllib.request
|
|
||||||
|
|
||||||
with urllib.request.urlopen("http://127.0.0.2:18080/", timeout=5) as response:
|
|
||||||
if response.status != 200:
|
|
||||||
raise SystemExit(f"node01 endpoint returned {response.status}")
|
|
||||||
with urllib.request.urlopen("http://127.0.0.2:18081/", timeout=5) as response:
|
|
||||||
if response.status != 200:
|
|
||||||
raise SystemExit(f"node01 worker endpoint returned {response.status}")
|
|
||||||
|
|
||||||
for port, label in ((18080, "api"), (18081, "worker")):
|
|
||||||
sock = socket.socket()
|
|
||||||
sock.settimeout(1.5)
|
|
||||||
try:
|
|
||||||
sock.connect(("127.0.0.3", port))
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
raise SystemExit(f"node02 {label} endpoint still accepts connections after scale-down")
|
|
||||||
finally:
|
|
||||||
sock.close()
|
|
||||||
|
|
||||||
print("Endpoint convergence validated")
|
|
||||||
PY
|
|
||||||
|
|
||||||
echo "Fleet scheduler E2E verification passed"
|
echo "Fleet scheduler E2E verification passed"
|
||||||
|
|
|
||||||
|
|
@ -34,9 +34,14 @@ run_plasmacloud_reconciler_bin() {
|
||||||
tmp_dir="$(mktemp -d)"
|
tmp_dir="$(mktemp -d)"
|
||||||
cf_pid=""
|
cf_pid=""
|
||||||
redfish_pid=""
|
redfish_pid=""
|
||||||
|
controller_pids=()
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
set +e
|
set +e
|
||||||
|
for pid in "${controller_pids[@]}"; do
|
||||||
|
kill "$pid" 2>/dev/null || true
|
||||||
|
wait "$pid" 2>/dev/null || true
|
||||||
|
done
|
||||||
if [[ -n "$redfish_pid" ]]; then
|
if [[ -n "$redfish_pid" ]]; then
|
||||||
kill "$redfish_pid" 2>/dev/null || true
|
kill "$redfish_pid" 2>/dev/null || true
|
||||||
wait "$redfish_pid" 2>/dev/null || true
|
wait "$redfish_pid" 2>/dev/null || true
|
||||||
|
|
@ -45,7 +50,11 @@ cleanup() {
|
||||||
kill "$cf_pid" 2>/dev/null || true
|
kill "$cf_pid" 2>/dev/null || true
|
||||||
wait "$cf_pid" 2>/dev/null || true
|
wait "$cf_pid" 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
|
if [[ "${PHOTONCLOUD_KEEP_TMP:-}" == "1" ]]; then
|
||||||
|
echo "Keeping temporary directory: $tmp_dir" >&2
|
||||||
|
else
|
||||||
rm -rf "$tmp_dir"
|
rm -rf "$tmp_dir"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
@ -248,21 +257,80 @@ run_deployer_ctl() {
|
||||||
"$@"
|
"$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
run_hosts_once() {
|
run_hosts_bg() {
|
||||||
run_plasmacloud_reconciler_bin \
|
run_plasmacloud_reconciler_bin \
|
||||||
hosts \
|
hosts \
|
||||||
--endpoint "$chainfire_endpoint" \
|
--endpoint "$chainfire_endpoint" \
|
||||||
--cluster-namespace photoncloud \
|
--cluster-namespace photoncloud \
|
||||||
--cluster-id test-cluster \
|
--cluster-id test-cluster \
|
||||||
--heartbeat-timeout-secs 300 \
|
--heartbeat-timeout-secs 300 \
|
||||||
--once
|
--interval-secs 300 \
|
||||||
|
>"$tmp_dir/hosts-controller.log" 2>&1 &
|
||||||
|
controller_pids+=("$!")
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_deployment_state() {
|
||||||
|
local expected_phase="$1"
|
||||||
|
local expected_paused="$2"
|
||||||
|
local expected_completed="$3"
|
||||||
|
local expected_in_progress="$4"
|
||||||
|
local expected_failed="$5"
|
||||||
|
local timeout_secs="${6:-120}"
|
||||||
|
local inspect_path="$tmp_dir/deployment-wait.json"
|
||||||
|
local deadline=$((SECONDS + timeout_secs))
|
||||||
|
|
||||||
|
while (( SECONDS < deadline )); do
|
||||||
|
if run_deployer_ctl deployment inspect --name worker-rollout --format json >"$inspect_path" 2>/dev/null; then
|
||||||
|
if python3 - "$inspect_path" "$expected_phase" "$expected_paused" "$expected_completed" "$expected_in_progress" "$expected_failed" <<'PY'
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
|
||||||
|
status = payload.get("status") or {}
|
||||||
|
|
||||||
|
def parse_csv(value):
|
||||||
|
if value == "-":
|
||||||
|
return None
|
||||||
|
if not value:
|
||||||
|
return []
|
||||||
|
return value.split(",")
|
||||||
|
|
||||||
|
expected_phase = sys.argv[2]
|
||||||
|
expected_paused = sys.argv[3].lower() == "true"
|
||||||
|
expected_completed = parse_csv(sys.argv[4])
|
||||||
|
expected_in_progress = parse_csv(sys.argv[5])
|
||||||
|
expected_failed = parse_csv(sys.argv[6])
|
||||||
|
|
||||||
|
if status.get("phase") != expected_phase:
|
||||||
|
raise SystemExit(1)
|
||||||
|
if bool(status.get("paused")) != expected_paused:
|
||||||
|
raise SystemExit(1)
|
||||||
|
if expected_completed is not None and status.get("completed_nodes", []) != expected_completed:
|
||||||
|
raise SystemExit(1)
|
||||||
|
if expected_in_progress is not None and status.get("in_progress_nodes", []) != expected_in_progress:
|
||||||
|
raise SystemExit(1)
|
||||||
|
if expected_failed is not None and status.get("failed_nodes", []) != expected_failed:
|
||||||
|
raise SystemExit(1)
|
||||||
|
PY
|
||||||
|
then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "timed out waiting for deployment state ${expected_phase}" >&2
|
||||||
|
[[ -f "$inspect_path" ]] && cat "$inspect_path" >&2
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "Applying host lifecycle cluster config"
|
echo "Applying host lifecycle cluster config"
|
||||||
run_deployer_ctl apply --config "$tmp_dir/cluster.yaml" --prune
|
run_deployer_ctl apply --config "$tmp_dir/cluster.yaml" --prune
|
||||||
|
|
||||||
echo "Running host rollout controller"
|
echo "Starting watch-driven host rollout controller"
|
||||||
run_hosts_once
|
run_hosts_bg
|
||||||
|
|
||||||
|
wait_for_deployment_state running false "" "node01" "" 120
|
||||||
|
|
||||||
run_deployer_ctl deployment inspect --name worker-rollout --format json >"$tmp_dir/deployment-1.json"
|
run_deployer_ctl deployment inspect --name worker-rollout --format json >"$tmp_dir/deployment-1.json"
|
||||||
python3 - "$tmp_dir/deployment-1.json" <<'PY'
|
python3 - "$tmp_dir/deployment-1.json" <<'PY'
|
||||||
|
|
@ -325,7 +393,7 @@ run_deployer_ctl node set-observed \
|
||||||
--node-id node01 \
|
--node-id node01 \
|
||||||
--status active \
|
--status active \
|
||||||
--nixos-configuration worker-next >/dev/null
|
--nixos-configuration worker-next >/dev/null
|
||||||
run_hosts_once
|
wait_for_deployment_state running false "node01" "node02" "" 120
|
||||||
|
|
||||||
run_deployer_ctl deployment inspect --name worker-rollout --format json >"$tmp_dir/deployment-2.json"
|
run_deployer_ctl deployment inspect --name worker-rollout --format json >"$tmp_dir/deployment-2.json"
|
||||||
python3 - "$tmp_dir/deployment-2.json" <<'PY'
|
python3 - "$tmp_dir/deployment-2.json" <<'PY'
|
||||||
|
|
@ -344,7 +412,7 @@ run_deployer_ctl node set-observed \
|
||||||
--node-id node02 \
|
--node-id node02 \
|
||||||
--status rolled-back \
|
--status rolled-back \
|
||||||
--nixos-configuration worker-next >/dev/null
|
--nixos-configuration worker-next >/dev/null
|
||||||
run_hosts_once
|
wait_for_deployment_state paused true "node01" "" "node02" 120
|
||||||
|
|
||||||
run_deployer_ctl deployment inspect --name worker-rollout --format json >"$tmp_dir/deployment-3.json"
|
run_deployer_ctl deployment inspect --name worker-rollout --format json >"$tmp_dir/deployment-3.json"
|
||||||
python3 - "$tmp_dir/deployment-3.json" <<'PY'
|
python3 - "$tmp_dir/deployment-3.json" <<'PY'
|
||||||
|
|
@ -407,6 +475,8 @@ assert payload["paused"] is True, payload
|
||||||
print("abort command validated")
|
print("abort command validated")
|
||||||
PY
|
PY
|
||||||
|
|
||||||
|
wait_for_deployment_state aborted true - - - 120
|
||||||
|
|
||||||
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes-2.dump"
|
run_deployer_ctl dump --prefix "photoncloud/clusters/test-cluster/nodes/" >"$tmp_dir/nodes-2.dump"
|
||||||
python3 - "$tmp_dir/nodes-2.dump" <<'PY'
|
python3 - "$tmp_dir/nodes-2.dump" <<'PY'
|
||||||
import json
|
import json
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue