Add service inspection to deployer ctl

This commit is contained in:
centra 2026-04-01 22:36:14 +09:00
parent 7450832bff
commit 0a5c823134
Signed by: centra
GPG key ID: 0C09689D20B25ACA
3 changed files with 209 additions and 7 deletions

View file

@ -8,7 +8,7 @@ use chrono::Utc;
use deployer_types::{
ClusterNodeRecord, ClusterStateSpec, CommissionState, DesiredSystemSpec, HostDeploymentSpec,
HostDeploymentStatus, InstallPlan, InstallState, NodeConfig, NodeSpec, ObservedSystemState,
PowerState,
PowerState, ServiceInstanceSpec, ServicePublicationState, ServiceSpec, ServiceStatusRecord,
};
use serde::de::DeserializeOwned;
use serde_json::{json, Value};
@ -140,6 +140,24 @@ fn key_service(cluster_namespace: &str, cluster_id: &str, svc: &str) -> Vec<u8>
.into_bytes()
}
fn key_service_status(cluster_namespace: &str, cluster_id: &str, service: &str) -> Vec<u8> {
format!(
"{}service-statuses/{}",
cluster_prefix(cluster_namespace, cluster_id),
service
)
.into_bytes()
}
fn key_publication(cluster_namespace: &str, cluster_id: &str, service: &str) -> Vec<u8> {
format!(
"{}publications/{}",
cluster_prefix(cluster_namespace, cluster_id),
service
)
.into_bytes()
}
fn key_instance(cluster_namespace: &str, cluster_id: &str, svc: &str, inst: &str) -> Vec<u8> {
format!(
"{}instances/{}/{}",
@ -1077,6 +1095,111 @@ pub async fn inspect_host_deployment(
.await
}
pub async fn inspect_service(
endpoint: &str,
cluster_namespace: &str,
cluster_id: &str,
service_name: &str,
include_instances: bool,
json_output: bool,
) -> Result<()> {
let endpoints = chainfire_endpoints(endpoint);
with_chainfire_endpoint_failover(&endpoints, "inspect service", |endpoint| {
let endpoint = endpoint.to_string();
let cluster_namespace = cluster_namespace.to_string();
let cluster_id = cluster_id.to_string();
let service_name = service_name.to_string();
async move {
let mut client = Client::connect(endpoint).await?;
let spec = get_json_key::<ServiceSpec>(
&mut client,
&key_service(&cluster_namespace, &cluster_id, &service_name),
)
.await?
.with_context(|| format!("service {} not found", service_name))?;
let status = get_json_key::<ServiceStatusRecord>(
&mut client,
&key_service_status(&cluster_namespace, &cluster_id, &service_name),
)
.await?;
let publication = get_json_key::<ServicePublicationState>(
&mut client,
&key_publication(&cluster_namespace, &cluster_id, &service_name),
)
.await?;
let instances = if include_instances {
let prefix = format!(
"{}instances/{}/",
cluster_prefix(&cluster_namespace, &cluster_id),
service_name
);
let kvs = client.get_prefix(prefix.as_bytes()).await?;
let mut instances = Vec::new();
for (_key, value) in kvs {
match serde_json::from_slice::<ServiceInstanceSpec>(&value) {
Ok(instance) => instances.push(instance),
Err(error) => warn!(
error = %error,
service = %service_name,
"failed to decode service instance"
),
}
}
instances.sort_by(|lhs, rhs| lhs.instance_id.cmp(&rhs.instance_id));
Some(instances)
} else {
None
};
if json_output {
println!(
"{}",
serde_json::to_string_pretty(&json!({
"spec": spec,
"status": status,
"publication": publication,
"instances": instances,
}))?
);
} else {
println!("name={}", spec.name);
println!(
"schedule_mode={}",
spec.schedule
.as_ref()
.map(|schedule| serde_json::to_string(&schedule.mode).unwrap_or_default())
.unwrap_or_else(|| "\"none\"".to_string())
);
println!("depends_on={}", spec.depends_on.len());
if let Some(status) = status {
println!("phase={}", status.phase);
println!("healthy_instances={}", status.healthy_instances);
println!("scheduled_instances={}", status.scheduled_instances);
println!("desired_instances={}", status.desired_instances);
println!("dependencies_ready={}", status.dependencies_ready);
println!("publish_ready={}", status.publish_ready);
if !status.blockers.is_empty() {
println!("blockers={}", status.blockers.join(";"));
}
}
if let Some(publication) = publication {
println!(
"published={}",
publication.dns.is_some() || publication.load_balancer.is_some()
);
}
if let Some(instances) = instances {
println!("instances={}", instances.len());
}
}
Ok(())
}
})
.await
}
pub async fn set_host_deployment_paused(
endpoint: &str,
cluster_namespace: &str,

View file

@ -95,6 +95,12 @@ enum Command {
#[command(subcommand)]
command: DeploymentCommand,
},
/// Service spec/status/publication/instances を表示する
Service {
#[command(subcommand)]
command: ServiceCommand,
},
}
#[derive(Subcommand, Debug)]
@ -211,6 +217,21 @@ enum DeploymentCommand {
},
}
#[derive(Subcommand, Debug)]
enum ServiceCommand {
/// Service の spec/status/publication を表示する
Inspect {
#[arg(long)]
name: String,
#[arg(long, default_value_t = false)]
include_instances: bool,
#[arg(long, value_enum, default_value_t = DumpFormat::Json)]
format: DumpFormat,
},
}
#[derive(Clone, Copy, Debug, ValueEnum)]
enum DumpFormat {
Text,
@ -316,8 +337,7 @@ impl PowerActionArg {
#[tokio::main]
async fn main() -> Result<()> {
let env_filter =
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
tracing_subscriber::fmt()
.with_writer(std::io::stderr)
.with_env_filter(env_filter)
@ -459,10 +479,9 @@ async fn main() -> Result<()> {
}
}
Command::Deployment { command } => {
let cluster_id = cli
.cluster_id
.as_deref()
.ok_or_else(|| anyhow::anyhow!("--cluster-id is required for deployment commands"))?;
let cluster_id = cli.cluster_id.as_deref().ok_or_else(|| {
anyhow::anyhow!("--cluster-id is required for deployment commands")
})?;
match command {
DeploymentCommand::Inspect { name, format } => {
@ -506,6 +525,30 @@ async fn main() -> Result<()> {
}
}
}
Command::Service { command } => {
let cluster_id = cli
.cluster_id
.as_deref()
.ok_or_else(|| anyhow::anyhow!("--cluster-id is required for service commands"))?;
match command {
ServiceCommand::Inspect {
name,
include_instances,
format,
} => {
chainfire::inspect_service(
&cli.chainfire_endpoint,
&cli.cluster_namespace,
cluster_id,
&name,
include_instances,
matches!(format, DumpFormat::Json),
)
.await?;
}
}
}
}
Ok(())

View file

@ -438,6 +438,22 @@ if not blockers or "dependency api has 0/2 healthy instance(s)" not in blockers[
print("worker service status reports dependency block")
PY
run_deployer_ctl service inspect --name worker >"$tmp_dir/worker-inspect-blocked.json"
python3 - "$tmp_dir/worker-inspect-blocked.json" <<'PY'
import json
import sys
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
status = payload.get("status") or {}
if payload.get("spec", {}).get("name") != "worker":
raise SystemExit("service inspect did not return worker spec")
if status.get("phase") != "blocked":
raise SystemExit(f"expected worker inspect phase=blocked, found {status.get('phase')}")
print("service inspect reports blocked dependency state")
PY
echo "Reconciling processes and health for api"
for _ in 1 2 3; do
run_node_agent_once node01
@ -541,6 +557,26 @@ if states != ["healthy", "healthy"]:
print("Observed two healthy dependent worker instances across node01 and node02")
PY
echo "Refreshing aggregated service status after worker became healthy"
run_scheduler_once
run_deployer_ctl service inspect --name worker --include-instances >"$tmp_dir/worker-inspect-healthy.json"
python3 - "$tmp_dir/worker-inspect-healthy.json" <<'PY'
import json
import sys
payload = json.load(open(sys.argv[1], "r", encoding="utf-8"))
status = payload.get("status") or {}
instances = payload.get("instances") or []
if status.get("phase") != "healthy":
raise SystemExit(f"expected worker inspect phase=healthy, found {status.get('phase')}")
if len(instances) != 2:
raise SystemExit(f"expected 2 worker instances from service inspect, found {len(instances)}")
print("service inspect reports healthy dependent instances")
PY
echo "Applying scaled declaration"
run_deployer_ctl apply --config "$tmp_dir/cluster-scaled.yaml" --prune