photoncloud-monorepo/deployer/crates/fleet-scheduler/src/publish.rs

1356 lines
43 KiB
Rust

use std::collections::{BTreeMap, HashMap, HashSet};
use anyhow::{Context, Result};
use chainfire_client::Client;
use chrono::Utc;
use deployer_types::{
DnsPublicationSpec, DnsPublishMode, LoadBalancerPublicationSpec, PublishedDnsRecordState,
PublishedLoadBalancerState, ServiceInstanceSpec, ServicePublicationState, ServiceSpec,
};
use fiberlb_api::backend_service_client::BackendServiceClient;
use fiberlb_api::listener_service_client::ListenerServiceClient;
use fiberlb_api::load_balancer_service_client::LoadBalancerServiceClient;
use fiberlb_api::pool_service_client::PoolServiceClient;
use fiberlb_api::{
BackendAdminState, CreateBackendRequest, CreateListenerRequest, CreateLoadBalancerRequest,
CreatePoolRequest, DeleteBackendRequest, DeleteListenerRequest, DeleteLoadBalancerRequest,
DeletePoolRequest, ListBackendsRequest, ListListenersRequest, ListLoadBalancersRequest,
ListPoolsRequest, Listener, ListenerProtocol, Pool, PoolAlgorithm, PoolProtocol,
UpdateBackendRequest,
};
use flashdns_api::proto::record_data;
use flashdns_api::proto::{
ARecord, CreateRecordRequest, CreateZoneRequest, DeleteRecordRequest, ListRecordsRequest,
ListZonesRequest, RecordData, RecordInfo, UpdateRecordRequest, ZoneInfo,
};
use flashdns_api::{RecordServiceClient, ZoneServiceClient};
use tonic::Code;
use tracing::{info, warn};
use crate::auth::{authorized_request, issue_controller_token};
pub struct PublicationConfig {
pub cluster_id: String,
pub heartbeat_timeout_secs: u64,
pub iam_endpoint: Option<String>,
pub fiberlb_endpoint: Option<String>,
pub flashdns_endpoint: Option<String>,
pub publish_address: Option<String>,
pub controller_principal_id: String,
pub default_org_id: String,
pub default_project_id: String,
pub dry_run: bool,
}
pub struct PublicationReconciler {
controller: PublicationController,
}
pub struct PublicationController {
config: PublicationConfig,
}
impl PublicationReconciler {
pub fn new(config: PublicationConfig) -> Self {
Self {
controller: PublicationController::new(config),
}
}
pub async fn reconcile_all(
&self,
client: &mut Client,
cluster_namespace: &str,
cluster_id: &str,
services: &[ServiceSpec],
_dry_run: bool,
) -> Result<()> {
let existing = load_publication_states(client, cluster_namespace, cluster_id).await?;
let mut desired_services = HashSet::new();
for service in services.iter().filter(|service| service.publish.is_some()) {
desired_services.insert(service.name.clone());
let instances =
load_service_instances(client, cluster_namespace, cluster_id, &service.name)
.await?;
let next = self
.controller
.reconcile(service, &instances, existing.get(&service.name))
.await?;
let key = publication_key(cluster_namespace, cluster_id, &service.name);
match next {
Some(state) => {
client.put(&key, &serde_json::to_vec(&state)?).await?;
}
None => {
client.delete(&key).await?;
}
}
}
for (service_name, state) in existing {
if desired_services.contains(&service_name) {
continue;
}
self.controller.cleanup(&state).await?;
client
.delete(&publication_key(
cluster_namespace,
cluster_id,
&service_name,
))
.await?;
}
Ok(())
}
}
impl PublicationController {
pub fn new(config: PublicationConfig) -> Self {
Self { config }
}
pub async fn reconcile(
&self,
service: &ServiceSpec,
instances: &[ServiceInstanceSpec],
existing: Option<&ServicePublicationState>,
) -> Result<Option<ServicePublicationState>> {
let Some(publication) = service.publish.as_ref() else {
return Ok(None);
};
let (org_id, project_id) = self.publication_scope(service, existing);
let token = if publication.load_balancer.is_some() || publication.dns.is_some() {
Some(self.issue_token(&org_id, &project_id).await?)
} else {
None
};
let healthy_instances: Vec<_> = instances
.iter()
.filter(|instance| {
instance_is_publishable(instance, self.config.heartbeat_timeout_secs)
})
.cloned()
.collect();
let load_balancer = if let Some(lb_spec) = publication.load_balancer.as_ref() {
Some(
self.reconcile_load_balancer(
service,
lb_spec,
&org_id,
&project_id,
token
.as_deref()
.context("publication requested without controller token")?,
&healthy_instances,
existing.and_then(|state| state.load_balancer.as_ref()),
)
.await?,
)
} else {
None
};
let dns = if let Some(dns_spec) = publication.dns.as_ref() {
self.reconcile_dns(
service,
dns_spec,
&org_id,
&project_id,
token
.as_deref()
.context("publication requested without controller token")?,
&healthy_instances,
load_balancer.as_ref(),
existing.and_then(|state| state.dns.as_ref()),
)
.await?
} else {
None
};
Ok(Some(ServicePublicationState {
service: service.name.clone(),
org_id,
project_id,
load_balancer,
dns,
observed_at: Some(Utc::now()),
}))
}
pub async fn cleanup(&self, state: &ServicePublicationState) -> Result<()> {
let Some(iam_endpoint) = self.config.iam_endpoint.as_deref() else {
warn!(service = %state.service, "skipping publication cleanup without IAM endpoint");
return Ok(());
};
let token = issue_controller_token(
iam_endpoint,
&self.config.controller_principal_id,
&state.org_id,
&state.project_id,
)
.await?;
if let Some(dns_state) = state.dns.as_ref() {
self.cleanup_dns(&token, dns_state).await?;
}
if let Some(lb_state) = state.load_balancer.as_ref() {
self.cleanup_load_balancer(&token, lb_state).await?;
}
Ok(())
}
fn publication_scope(
&self,
service: &ServiceSpec,
existing: Option<&ServicePublicationState>,
) -> (String, String) {
let publish = service.publish.as_ref();
let org_id = publish
.and_then(|spec| spec.org_id.clone())
.or_else(|| {
publish
.and_then(|spec| spec.load_balancer.as_ref())
.and_then(|spec| spec.org_id.clone())
})
.or_else(|| existing.map(|state| state.org_id.clone()))
.unwrap_or_else(|| self.config.default_org_id.clone());
let project_id = publish
.and_then(|spec| spec.project_id.clone())
.or_else(|| {
publish
.and_then(|spec| spec.load_balancer.as_ref())
.and_then(|spec| spec.project_id.clone())
})
.or_else(|| existing.map(|state| state.project_id.clone()))
.unwrap_or_else(|| self.config.default_project_id.clone());
(org_id, project_id)
}
async fn issue_token(&self, org_id: &str, project_id: &str) -> Result<String> {
let iam_endpoint = self
.config
.iam_endpoint
.as_deref()
.context("publication requires --iam-endpoint")?;
issue_controller_token(
iam_endpoint,
&self.config.controller_principal_id,
org_id,
project_id,
)
.await
}
async fn reconcile_load_balancer(
&self,
service: &ServiceSpec,
spec: &LoadBalancerPublicationSpec,
org_id: &str,
project_id: &str,
auth_token: &str,
healthy_instances: &[ServiceInstanceSpec],
existing: Option<&PublishedLoadBalancerState>,
) -> Result<PublishedLoadBalancerState> {
let Some(endpoint) = self.config.fiberlb_endpoint.as_ref() else {
warn!(service = %service.name, "publication requested without FiberLB endpoint");
return existing
.cloned()
.context("missing FiberLB endpoint for load balancer publication");
};
let listener_port = resolve_target_port(service, spec)
.context("load balancer publication requires listener_port or target port")?;
let lb_name = spec.name.clone().unwrap_or_else(|| {
sanitize_name(&format!("{}-{}", self.config.cluster_id, service.name))
});
let pool_name = format!("{lb_name}-pool");
let listener_name = format!("{lb_name}-listener-{listener_port}");
if self.config.dry_run {
info!(service = %service.name, load_balancer = %lb_name, "would reconcile native load balancer");
return Ok(existing.cloned().unwrap_or(PublishedLoadBalancerState {
id: String::new(),
pool_id: String::new(),
listener_id: String::new(),
vip_address: None,
}));
}
let mut lb_client = LoadBalancerServiceClient::connect(endpoint.clone()).await?;
let mut pool_client = PoolServiceClient::connect(endpoint.clone()).await?;
let mut listener_client = ListenerServiceClient::connect(endpoint.clone()).await?;
let mut backend_client = BackendServiceClient::connect(endpoint.clone()).await?;
let load_balancer = ensure_load_balancer(
&mut lb_client,
auth_token,
existing,
org_id,
project_id,
&lb_name,
)
.await?;
let pool = ensure_pool(
&mut pool_client,
auth_token,
existing,
&load_balancer.id,
&pool_name,
spec,
service,
)
.await?;
let listener = ensure_listener(
&mut listener_client,
auth_token,
existing,
&load_balancer.id,
&listener_name,
listener_port,
&pool.id,
spec,
service,
)
.await?;
reconcile_backends(
&mut backend_client,
auth_token,
&pool.id,
service,
healthy_instances,
)
.await?;
Ok(PublishedLoadBalancerState {
id: load_balancer.id,
pool_id: pool.id,
listener_id: listener.id,
vip_address: empty_to_none(load_balancer.vip_address)
.or_else(|| self.config.publish_address.clone()),
})
}
async fn reconcile_dns(
&self,
service: &ServiceSpec,
spec: &DnsPublicationSpec,
org_id: &str,
project_id: &str,
auth_token: &str,
healthy_instances: &[ServiceInstanceSpec],
load_balancer: Option<&PublishedLoadBalancerState>,
existing: Option<&PublishedDnsRecordState>,
) -> Result<Option<PublishedDnsRecordState>> {
let Some(endpoint) = self.config.flashdns_endpoint.as_ref() else {
warn!(service = %service.name, "DNS publication requested without FlashDNS endpoint");
return Ok(existing.cloned());
};
let desired_values = desired_dns_values(spec, healthy_instances, load_balancer);
if desired_values.is_empty() {
if let Some(existing) = existing {
self.cleanup_dns(auth_token, existing).await?;
}
return Ok(None);
}
let zone_name = normalize_zone_name(&spec.zone);
let record_name = record_name_for_service(spec, service);
let fqdn = format!("{}.{}", record_name, zone_name);
let primary_value = desired_values.first().cloned().unwrap_or_default();
if self.config.dry_run {
info!(
service = %service.name,
fqdn = %fqdn,
values = ?desired_values,
"would reconcile native DNS record set"
);
return Ok(existing.cloned().or(Some(PublishedDnsRecordState {
zone_id: String::new(),
record_id: String::new(),
record_ids: Vec::new(),
fqdn,
value: primary_value,
values: desired_values,
})));
}
let mut zone_client = ZoneServiceClient::connect(endpoint.clone()).await?;
let mut record_client = RecordServiceClient::connect(endpoint.clone()).await?;
let zone =
ensure_zone(&mut zone_client, auth_token, &zone_name, org_id, project_id).await?;
let records = ensure_records(
&mut record_client,
auth_token,
existing,
&zone.id,
&record_name,
spec.ttl,
&desired_values,
)
.await?;
let record_ids = records
.iter()
.map(|record| record.id.clone())
.collect::<Vec<_>>();
Ok(Some(PublishedDnsRecordState {
zone_id: zone.id,
record_id: record_ids.first().cloned().unwrap_or_default(),
record_ids,
fqdn,
value: primary_value,
values: desired_values,
}))
}
async fn cleanup_dns(
&self,
auth_token: &str,
dns_state: &PublishedDnsRecordState,
) -> Result<()> {
let Some(endpoint) = self.config.flashdns_endpoint.as_ref() else {
return Ok(());
};
let mut record_client = RecordServiceClient::connect(endpoint.clone()).await?;
let mut record_ids = dns_state.record_ids.clone();
if record_ids.is_empty() && !dns_state.record_id.is_empty() {
record_ids.push(dns_state.record_id.clone());
}
record_ids.sort();
record_ids.dedup();
for record_id in record_ids {
match record_client
.delete_record(authorized_request(
DeleteRecordRequest { id: record_id },
auth_token,
))
.await
{
Ok(_) => {}
Err(status) if status.code() == Code::NotFound => {}
Err(status) => return Err(status.into()),
}
}
Ok(())
}
async fn cleanup_load_balancer(
&self,
auth_token: &str,
lb_state: &PublishedLoadBalancerState,
) -> Result<()> {
let Some(endpoint) = self.config.fiberlb_endpoint.as_ref() else {
return Ok(());
};
let mut backend_client = BackendServiceClient::connect(endpoint.clone()).await?;
let mut listener_client = ListenerServiceClient::connect(endpoint.clone()).await?;
let mut pool_client = PoolServiceClient::connect(endpoint.clone()).await?;
let mut lb_client = LoadBalancerServiceClient::connect(endpoint.clone()).await?;
if !lb_state.pool_id.is_empty() {
if let Ok(response) = backend_client
.list_backends(authorized_request(
ListBackendsRequest {
pool_id: lb_state.pool_id.clone(),
page_size: 256,
page_token: String::new(),
},
auth_token,
))
.await
{
for backend in response.into_inner().backends {
delete_backend(&mut backend_client, auth_token, &backend.id).await?;
}
}
}
if !lb_state.listener_id.is_empty() {
match listener_client
.delete_listener(authorized_request(
DeleteListenerRequest {
id: lb_state.listener_id.clone(),
},
auth_token,
))
.await
{
Ok(_) => {}
Err(status) if status.code() == Code::NotFound => {}
Err(status) => return Err(status.into()),
}
}
if !lb_state.pool_id.is_empty() {
match pool_client
.delete_pool(authorized_request(
DeletePoolRequest {
id: lb_state.pool_id.clone(),
},
auth_token,
))
.await
{
Ok(_) => {}
Err(status) if status.code() == Code::NotFound => {}
Err(status) => return Err(status.into()),
}
}
if !lb_state.id.is_empty() {
match lb_client
.delete_load_balancer(authorized_request(
DeleteLoadBalancerRequest {
id: lb_state.id.clone(),
},
auth_token,
))
.await
{
Ok(_) => {}
Err(status) if status.code() == Code::NotFound => {}
Err(status) => return Err(status.into()),
}
}
Ok(())
}
}
async fn ensure_load_balancer(
client: &mut LoadBalancerServiceClient<tonic::transport::Channel>,
auth_token: &str,
existing: Option<&PublishedLoadBalancerState>,
org_id: &str,
project_id: &str,
name: &str,
) -> Result<fiberlb_api::LoadBalancer> {
let current = client
.list_load_balancers(authorized_request(
ListLoadBalancersRequest {
org_id: org_id.to_string(),
project_id: project_id.to_string(),
page_size: 256,
page_token: String::new(),
},
auth_token,
))
.await?
.into_inner()
.loadbalancers
.into_iter()
.find(|lb| {
existing.map(|state| state.id.as_str()) == Some(lb.id.as_str()) || lb.name == name
});
if let Some(load_balancer) = current {
return Ok(load_balancer);
}
Ok(client
.create_load_balancer(authorized_request(
CreateLoadBalancerRequest {
name: name.to_string(),
org_id: org_id.to_string(),
project_id: project_id.to_string(),
description: format!("native runtime service {name}"),
vip_address: String::new(),
},
auth_token,
))
.await?
.into_inner()
.loadbalancer
.context("FiberLB returned empty CreateLoadBalancer response")?)
}
async fn ensure_pool(
client: &mut PoolServiceClient<tonic::transport::Channel>,
auth_token: &str,
existing: Option<&PublishedLoadBalancerState>,
load_balancer_id: &str,
name: &str,
spec: &LoadBalancerPublicationSpec,
service: &ServiceSpec,
) -> Result<Pool> {
let current = client
.list_pools(authorized_request(
ListPoolsRequest {
loadbalancer_id: load_balancer_id.to_string(),
page_size: 256,
page_token: String::new(),
},
auth_token,
))
.await?
.into_inner()
.pools
.into_iter()
.find(|pool| {
existing.map(|state| state.pool_id.as_str()) == Some(pool.id.as_str())
|| pool.name == name
});
if let Some(pool) = current {
return Ok(pool);
}
Ok(client
.create_pool(authorized_request(
CreatePoolRequest {
name: name.to_string(),
loadbalancer_id: load_balancer_id.to_string(),
algorithm: PoolAlgorithm::RoundRobin as i32,
protocol: pool_protocol(spec, service) as i32,
session_persistence: None,
},
auth_token,
))
.await?
.into_inner()
.pool
.context("FiberLB returned empty CreatePool response")?)
}
async fn ensure_listener(
client: &mut ListenerServiceClient<tonic::transport::Channel>,
auth_token: &str,
existing: Option<&PublishedLoadBalancerState>,
load_balancer_id: &str,
name: &str,
port: u16,
default_pool_id: &str,
spec: &LoadBalancerPublicationSpec,
service: &ServiceSpec,
) -> Result<Listener> {
let listeners = client
.list_listeners(authorized_request(
ListListenersRequest {
loadbalancer_id: load_balancer_id.to_string(),
page_size: 256,
page_token: String::new(),
},
auth_token,
))
.await?
.into_inner()
.listeners;
if let Some(listener) = listeners.iter().find(|listener| {
existing.map(|state| state.listener_id.as_str()) == Some(listener.id.as_str())
|| listener.name == name
}) {
let listener = listener.clone();
if listener.port == port as u32
&& listener.protocol == listener_protocol(spec, service) as i32
&& listener.default_pool_id == default_pool_id
{
return Ok(listener);
}
client
.delete_listener(authorized_request(
DeleteListenerRequest {
id: listener.id.clone(),
},
auth_token,
))
.await?;
}
Ok(client
.create_listener(authorized_request(
CreateListenerRequest {
name: name.to_string(),
loadbalancer_id: load_balancer_id.to_string(),
protocol: listener_protocol(spec, service) as i32,
port: port as u32,
default_pool_id: default_pool_id.to_string(),
tls_config: None,
connection_limit: 0,
},
auth_token,
))
.await?
.into_inner()
.listener
.context("FiberLB returned empty CreateListener response")?)
}
async fn reconcile_backends(
client: &mut BackendServiceClient<tonic::transport::Channel>,
auth_token: &str,
pool_id: &str,
service: &ServiceSpec,
healthy_instances: &[ServiceInstanceSpec],
) -> Result<()> {
let existing = client
.list_backends(authorized_request(
ListBackendsRequest {
pool_id: pool_id.to_string(),
page_size: 256,
page_token: String::new(),
},
auth_token,
))
.await?
.into_inner()
.backends;
let desired_names: HashSet<String> = healthy_instances
.iter()
.map(|instance| backend_name_for_instance(service, instance))
.collect();
for backend in &existing {
if !desired_names.contains(&backend.name) {
delete_backend(client, auth_token, &backend.id).await?;
}
}
for instance in healthy_instances {
let backend_name = backend_name_for_instance(service, instance);
let matching = existing.iter().find(|backend| backend.name == backend_name);
if let Some(backend) = matching {
if backend.address == instance.ip
&& backend.port == instance.port as u32
&& backend.admin_state == BackendAdminState::Enabled as i32
{
continue;
}
if backend.address != instance.ip || backend.port != instance.port as u32 {
delete_backend(client, auth_token, &backend.id).await?;
} else {
client
.update_backend(authorized_request(
UpdateBackendRequest {
id: backend.id.clone(),
name: backend.name.clone(),
weight: backend.weight,
admin_state: BackendAdminState::Enabled as i32,
},
auth_token,
))
.await?;
continue;
}
}
client
.create_backend(authorized_request(
CreateBackendRequest {
name: backend_name,
pool_id: pool_id.to_string(),
address: instance.ip.clone(),
port: instance.port as u32,
weight: 1,
},
auth_token,
))
.await?;
}
Ok(())
}
async fn delete_backend(
client: &mut BackendServiceClient<tonic::transport::Channel>,
auth_token: &str,
backend_id: &str,
) -> Result<()> {
match client
.delete_backend(authorized_request(
DeleteBackendRequest {
id: backend_id.to_string(),
},
auth_token,
))
.await
{
Ok(_) => Ok(()),
Err(status) if status.code() == Code::NotFound => Ok(()),
Err(status) => Err(status.into()),
}
}
async fn ensure_zone(
client: &mut ZoneServiceClient<tonic::transport::Channel>,
auth_token: &str,
zone_name: &str,
org_id: &str,
project_id: &str,
) -> Result<ZoneInfo> {
let response = client
.list_zones(authorized_request(
ListZonesRequest {
org_id: org_id.to_string(),
project_id: project_id.to_string(),
name_filter: zone_name.to_string(),
page_size: 256,
page_token: String::new(),
},
auth_token,
))
.await?;
if let Some(zone) = response
.into_inner()
.zones
.into_iter()
.find(|zone| normalize_zone_name(&zone.name) == zone_name)
{
return Ok(zone);
}
Ok(client
.create_zone(authorized_request(
CreateZoneRequest {
name: zone_name.to_string(),
org_id: org_id.to_string(),
project_id: project_id.to_string(),
primary_ns: "ns1.native.cluster".to_string(),
admin_email: "admin@native.cluster".to_string(),
},
auth_token,
))
.await?
.into_inner()
.zone
.context("FlashDNS returned empty CreateZone response")?)
}
async fn ensure_records(
client: &mut RecordServiceClient<tonic::transport::Channel>,
auth_token: &str,
existing: Option<&PublishedDnsRecordState>,
zone_id: &str,
name: &str,
ttl: u32,
desired_values: &[String],
) -> Result<Vec<RecordInfo>> {
let records = client
.list_records(authorized_request(
ListRecordsRequest {
zone_id: zone_id.to_string(),
name_filter: name.to_string(),
type_filter: "A".to_string(),
page_size: 256,
page_token: String::new(),
},
auth_token,
))
.await?
.into_inner()
.records;
let mut matching = records
.iter()
.filter(|record| {
record.name == name
|| existing.map(|state| state.record_id.as_str()) == Some(record.id.as_str())
|| existing
.map(|state| state.record_ids.iter().any(|id| id == &record.id))
.unwrap_or(false)
})
.cloned()
.collect::<Vec<_>>();
matching.sort_by(|lhs, rhs| {
record_a_value(lhs)
.cmp(&record_a_value(rhs))
.then_with(|| lhs.id.cmp(&rhs.id))
});
let mut records_by_value: BTreeMap<String, Vec<RecordInfo>> = BTreeMap::new();
for record in matching {
let Some(value) = record_a_value(&record) else {
continue;
};
records_by_value.entry(value).or_default().push(record);
}
let mut ensured = Vec::new();
for desired_value in desired_values {
if let Some(record) = records_by_value.get_mut(desired_value).and_then(|records| {
if records.is_empty() {
None
} else {
Some(records.remove(0))
}
}) {
if record.ttl != ttl || !record.enabled {
let updated = client
.update_record(authorized_request(
UpdateRecordRequest {
id: record.id.clone(),
ttl: Some(ttl),
data: Some(RecordData {
data: Some(record_data::Data::A(ARecord {
address: desired_value.to_string(),
})),
}),
enabled: Some(true),
},
auth_token,
))
.await?
.into_inner()
.record
.context("FlashDNS returned empty UpdateRecord response")?;
ensured.push(updated);
} else {
ensured.push(record);
}
continue;
}
ensured.push(
client
.create_record(authorized_request(
CreateRecordRequest {
zone_id: zone_id.to_string(),
name: name.to_string(),
record_type: "A".to_string(),
ttl,
data: Some(RecordData {
data: Some(record_data::Data::A(ARecord {
address: desired_value.to_string(),
})),
}),
},
auth_token,
))
.await?
.into_inner()
.record
.context("FlashDNS returned empty CreateRecord response")?,
);
}
for extra in records_by_value.into_values().flatten() {
delete_record(client, auth_token, &extra.id).await?;
}
ensured.sort_by(|lhs, rhs| {
record_a_value(lhs)
.cmp(&record_a_value(rhs))
.then_with(|| lhs.id.cmp(&rhs.id))
});
Ok(ensured)
}
async fn delete_record(
client: &mut RecordServiceClient<tonic::transport::Channel>,
auth_token: &str,
record_id: &str,
) -> Result<()> {
match client
.delete_record(authorized_request(
DeleteRecordRequest {
id: record_id.to_string(),
},
auth_token,
))
.await
{
Ok(_) => Ok(()),
Err(status) if status.code() == Code::NotFound => Ok(()),
Err(status) => Err(status.into()),
}
}
fn resolve_target_port(service: &ServiceSpec, spec: &LoadBalancerPublicationSpec) -> Option<u16> {
spec.listener_port
.or_else(|| {
service
.schedule
.as_ref()
.and_then(|schedule| schedule.instance_port)
})
.or_else(|| service.ports.as_ref().and_then(|ports| ports.http))
.or_else(|| service.ports.as_ref().and_then(|ports| ports.grpc))
}
fn record_a_value(record: &RecordInfo) -> Option<String> {
record
.data
.as_ref()
.and_then(|data| data.data.as_ref())
.and_then(|data| match data {
record_data::Data::A(record) => Some(record.address.clone()),
_ => None,
})
}
fn normalize_dns_values(values: impl IntoIterator<Item = String>) -> Vec<String> {
let mut values = values
.into_iter()
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
.collect::<Vec<_>>();
values.sort();
values.dedup();
values
}
fn desired_dns_values(
spec: &DnsPublicationSpec,
healthy_instances: &[ServiceInstanceSpec],
load_balancer: Option<&PublishedLoadBalancerState>,
) -> Vec<String> {
match spec.mode {
DnsPublishMode::LoadBalancer => normalize_dns_values(
load_balancer
.and_then(|state| state.vip_address.clone())
.filter(|value| !value.is_empty() && value != "0.0.0.0")
.or_else(|| {
healthy_instances
.first()
.map(|instance| instance.ip.clone())
})
.into_iter(),
),
DnsPublishMode::Direct => normalize_dns_values(
healthy_instances
.first()
.map(|instance| instance.ip.clone())
.into_iter(),
),
DnsPublishMode::DirectMulti => {
normalize_dns_values(healthy_instances.iter().map(|instance| instance.ip.clone()))
}
}
}
fn instance_is_publishable(instance: &ServiceInstanceSpec, heartbeat_timeout_secs: u64) -> bool {
if instance.state.as_deref() != Some("healthy") {
return false;
}
if heartbeat_timeout_secs == 0 {
return true;
}
let Some(last_heartbeat) = instance.last_heartbeat.or(instance.observed_at) else {
return false;
};
Utc::now()
.signed_duration_since(last_heartbeat)
.num_seconds()
<= heartbeat_timeout_secs as i64
}
fn record_name_for_service(spec: &DnsPublicationSpec, service: &ServiceSpec) -> String {
let zone_name = normalize_zone_name(&spec.zone);
let raw_name = spec.name.clone().unwrap_or_else(|| service.name.clone());
let trimmed = raw_name.trim_end_matches('.').to_string();
let suffix = format!(".{}", zone_name);
trimmed
.strip_suffix(&suffix)
.unwrap_or(trimmed.as_str())
.to_string()
}
fn normalize_zone_name(zone: &str) -> String {
zone.trim_end_matches('.').to_string()
}
fn sanitize_name(value: &str) -> String {
value
.chars()
.map(|ch| {
if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_') {
ch
} else {
'-'
}
})
.collect()
}
fn backend_name_for_instance(service: &ServiceSpec, instance: &ServiceInstanceSpec) -> String {
sanitize_name(&format!("{}-{}", service.name, instance.instance_id))
}
fn listener_protocol(
spec: &LoadBalancerPublicationSpec,
service: &ServiceSpec,
) -> ListenerProtocol {
match spec
.protocol
.as_deref()
.or(service.protocol.as_deref())
.unwrap_or("tcp")
{
"http" => ListenerProtocol::Http,
"https" => ListenerProtocol::Https,
"terminated_https" => ListenerProtocol::TerminatedHttps,
"udp" => ListenerProtocol::Udp,
_ => ListenerProtocol::Tcp,
}
}
fn pool_protocol(spec: &LoadBalancerPublicationSpec, service: &ServiceSpec) -> PoolProtocol {
match spec
.pool_protocol
.as_deref()
.or(spec.protocol.as_deref())
.or(service.protocol.as_deref())
.unwrap_or("tcp")
{
"http" => PoolProtocol::Http,
"https" => PoolProtocol::Https,
"udp" => PoolProtocol::Udp,
_ => PoolProtocol::Tcp,
}
}
fn empty_to_none(value: String) -> Option<String> {
if value.trim().is_empty() {
None
} else {
Some(value)
}
}
fn publication_key(cluster_namespace: &str, cluster_id: &str, service: &str) -> Vec<u8> {
format!(
"{}/clusters/{}/publications/{}",
cluster_namespace, cluster_id, service
)
.into_bytes()
}
async fn load_publication_states(
client: &mut Client,
cluster_namespace: &str,
cluster_id: &str,
) -> Result<HashMap<String, ServicePublicationState>> {
let prefix = format!(
"{}/clusters/{}/publications/",
cluster_namespace, cluster_id
);
let kvs = client.get_prefix(prefix.as_bytes()).await?;
let mut states = HashMap::with_capacity(kvs.len());
for (_key, value) in kvs {
match serde_json::from_slice::<ServicePublicationState>(&value) {
Ok(state) => {
states.insert(state.service.clone(), state);
}
Err(error) => warn!(error = %error, "failed to decode service publication state"),
}
}
Ok(states)
}
async fn load_service_instances(
client: &mut Client,
cluster_namespace: &str,
cluster_id: &str,
service: &str,
) -> Result<Vec<ServiceInstanceSpec>> {
let prefix = format!(
"{}/clusters/{}/instances/{}/",
cluster_namespace, cluster_id, service
);
let kvs = client.get_prefix(prefix.as_bytes()).await?;
let mut instances = Vec::with_capacity(kvs.len());
for (_key, value) in kvs {
match serde_json::from_slice::<ServiceInstanceSpec>(&value) {
Ok(instance) => instances.push(instance),
Err(error) => {
warn!(service = %service, error = %error, "failed to decode service instance")
}
}
}
instances.sort_by(|lhs, rhs| lhs.instance_id.cmp(&rhs.instance_id));
Ok(instances)
}
#[cfg(test)]
mod tests {
use super::*;
use deployer_types::{ServicePorts, ServicePublicationSpec, ServiceScheduleSpec};
fn service_with_dns(name: &str, zone: &str) -> ServiceSpec {
ServiceSpec {
name: name.to_string(),
ports: Some(ServicePorts {
http: Some(8080),
grpc: None,
}),
protocol: Some("http".to_string()),
mtls_required: None,
mesh_mode: None,
schedule: Some(ServiceScheduleSpec::default()),
publish: Some(ServicePublicationSpec {
org_id: Some("default-org".to_string()),
project_id: Some("default-project".to_string()),
dns: Some(DnsPublicationSpec {
zone: zone.to_string(),
name: None,
ttl: 60,
mode: DnsPublishMode::LoadBalancer,
}),
load_balancer: None,
}),
}
}
#[test]
fn test_record_name_strips_zone_suffix() {
let spec = DnsPublicationSpec {
zone: "native.cluster.test".to_string(),
name: Some("api.native.cluster.test".to_string()),
ttl: 60,
mode: DnsPublishMode::LoadBalancer,
};
let service = service_with_dns("api", "native.cluster.test");
assert_eq!(record_name_for_service(&spec, &service), "api");
}
#[test]
fn test_dns_values_fall_back_to_healthy_instance_when_vip_missing() {
let spec = DnsPublicationSpec {
zone: "native.cluster.test".to_string(),
name: Some("api".to_string()),
ttl: 60,
mode: DnsPublishMode::LoadBalancer,
};
let instances = vec![ServiceInstanceSpec {
instance_id: "api-node01".to_string(),
service: "api".to_string(),
node_id: "node01".to_string(),
ip: "10.0.0.11".to_string(),
port: 8080,
mesh_port: None,
version: None,
health_check: None,
process: None,
container: None,
managed_by: None,
state: Some("healthy".to_string()),
last_heartbeat: None,
observed_at: None,
}];
assert_eq!(
desired_dns_values(&spec, &instances, None),
vec!["10.0.0.11".to_string()]
);
}
#[test]
fn test_direct_multi_dns_publishes_all_healthy_instance_ips() {
let spec = DnsPublicationSpec {
zone: "native.cluster.test".to_string(),
name: Some("daemon".to_string()),
ttl: 60,
mode: DnsPublishMode::DirectMulti,
};
let instances = vec![
ServiceInstanceSpec {
instance_id: "daemon-node02".to_string(),
service: "daemon".to_string(),
node_id: "node02".to_string(),
ip: "10.0.0.12".to_string(),
port: 8080,
mesh_port: None,
version: None,
health_check: None,
process: None,
container: None,
managed_by: None,
state: Some("healthy".to_string()),
last_heartbeat: None,
observed_at: None,
},
ServiceInstanceSpec {
instance_id: "daemon-node01".to_string(),
service: "daemon".to_string(),
node_id: "node01".to_string(),
ip: "10.0.0.11".to_string(),
port: 8080,
mesh_port: None,
version: None,
health_check: None,
process: None,
container: None,
managed_by: None,
state: Some("healthy".to_string()),
last_heartbeat: None,
observed_at: None,
},
ServiceInstanceSpec {
instance_id: "daemon-node03".to_string(),
service: "daemon".to_string(),
node_id: "node03".to_string(),
ip: "10.0.0.11".to_string(),
port: 8080,
mesh_port: None,
version: None,
health_check: None,
process: None,
container: None,
managed_by: None,
state: Some("healthy".to_string()),
last_heartbeat: None,
observed_at: None,
},
];
assert_eq!(
desired_dns_values(&spec, &instances, None),
vec!["10.0.0.11".to_string(), "10.0.0.12".to_string()]
);
}
#[test]
fn test_publishable_instance_requires_fresh_heartbeat() {
let now = Utc::now();
let mut fresh = ServiceInstanceSpec {
instance_id: "api-node01".to_string(),
service: "api".to_string(),
node_id: "node01".to_string(),
ip: "10.0.0.11".to_string(),
port: 8080,
mesh_port: None,
version: None,
health_check: None,
process: None,
container: None,
managed_by: None,
state: Some("healthy".to_string()),
last_heartbeat: Some(now),
observed_at: None,
};
assert!(instance_is_publishable(&fresh, 60));
fresh.last_heartbeat = Some(now - chrono::Duration::seconds(120));
assert!(!instance_is_publishable(&fresh, 60));
fresh.last_heartbeat = Some(now);
fresh.state = Some("unhealthy".to_string());
assert!(!instance_is_publishable(&fresh, 60));
}
}