1356 lines
43 KiB
Rust
1356 lines
43 KiB
Rust
use std::collections::{BTreeMap, HashMap, HashSet};
|
|
|
|
use anyhow::{Context, Result};
|
|
use chainfire_client::Client;
|
|
use chrono::Utc;
|
|
use deployer_types::{
|
|
DnsPublicationSpec, DnsPublishMode, LoadBalancerPublicationSpec, PublishedDnsRecordState,
|
|
PublishedLoadBalancerState, ServiceInstanceSpec, ServicePublicationState, ServiceSpec,
|
|
};
|
|
use fiberlb_api::backend_service_client::BackendServiceClient;
|
|
use fiberlb_api::listener_service_client::ListenerServiceClient;
|
|
use fiberlb_api::load_balancer_service_client::LoadBalancerServiceClient;
|
|
use fiberlb_api::pool_service_client::PoolServiceClient;
|
|
use fiberlb_api::{
|
|
BackendAdminState, CreateBackendRequest, CreateListenerRequest, CreateLoadBalancerRequest,
|
|
CreatePoolRequest, DeleteBackendRequest, DeleteListenerRequest, DeleteLoadBalancerRequest,
|
|
DeletePoolRequest, ListBackendsRequest, ListListenersRequest, ListLoadBalancersRequest,
|
|
ListPoolsRequest, Listener, ListenerProtocol, Pool, PoolAlgorithm, PoolProtocol,
|
|
UpdateBackendRequest,
|
|
};
|
|
use flashdns_api::proto::record_data;
|
|
use flashdns_api::proto::{
|
|
ARecord, CreateRecordRequest, CreateZoneRequest, DeleteRecordRequest, ListRecordsRequest,
|
|
ListZonesRequest, RecordData, RecordInfo, UpdateRecordRequest, ZoneInfo,
|
|
};
|
|
use flashdns_api::{RecordServiceClient, ZoneServiceClient};
|
|
use tonic::Code;
|
|
use tracing::{info, warn};
|
|
|
|
use crate::auth::{authorized_request, issue_controller_token};
|
|
|
|
pub struct PublicationConfig {
|
|
pub cluster_id: String,
|
|
pub heartbeat_timeout_secs: u64,
|
|
pub iam_endpoint: Option<String>,
|
|
pub fiberlb_endpoint: Option<String>,
|
|
pub flashdns_endpoint: Option<String>,
|
|
pub publish_address: Option<String>,
|
|
pub controller_principal_id: String,
|
|
pub default_org_id: String,
|
|
pub default_project_id: String,
|
|
pub dry_run: bool,
|
|
}
|
|
|
|
pub struct PublicationReconciler {
|
|
controller: PublicationController,
|
|
}
|
|
|
|
pub struct PublicationController {
|
|
config: PublicationConfig,
|
|
}
|
|
|
|
impl PublicationReconciler {
|
|
pub fn new(config: PublicationConfig) -> Self {
|
|
Self {
|
|
controller: PublicationController::new(config),
|
|
}
|
|
}
|
|
|
|
pub async fn reconcile_all(
|
|
&self,
|
|
client: &mut Client,
|
|
cluster_namespace: &str,
|
|
cluster_id: &str,
|
|
services: &[ServiceSpec],
|
|
_dry_run: bool,
|
|
) -> Result<()> {
|
|
let existing = load_publication_states(client, cluster_namespace, cluster_id).await?;
|
|
let mut desired_services = HashSet::new();
|
|
|
|
for service in services.iter().filter(|service| service.publish.is_some()) {
|
|
desired_services.insert(service.name.clone());
|
|
let instances =
|
|
load_service_instances(client, cluster_namespace, cluster_id, &service.name)
|
|
.await?;
|
|
let next = self
|
|
.controller
|
|
.reconcile(service, &instances, existing.get(&service.name))
|
|
.await?;
|
|
let key = publication_key(cluster_namespace, cluster_id, &service.name);
|
|
match next {
|
|
Some(state) => {
|
|
client.put(&key, &serde_json::to_vec(&state)?).await?;
|
|
}
|
|
None => {
|
|
client.delete(&key).await?;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (service_name, state) in existing {
|
|
if desired_services.contains(&service_name) {
|
|
continue;
|
|
}
|
|
self.controller.cleanup(&state).await?;
|
|
client
|
|
.delete(&publication_key(
|
|
cluster_namespace,
|
|
cluster_id,
|
|
&service_name,
|
|
))
|
|
.await?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl PublicationController {
|
|
pub fn new(config: PublicationConfig) -> Self {
|
|
Self { config }
|
|
}
|
|
|
|
pub async fn reconcile(
|
|
&self,
|
|
service: &ServiceSpec,
|
|
instances: &[ServiceInstanceSpec],
|
|
existing: Option<&ServicePublicationState>,
|
|
) -> Result<Option<ServicePublicationState>> {
|
|
let Some(publication) = service.publish.as_ref() else {
|
|
return Ok(None);
|
|
};
|
|
|
|
let (org_id, project_id) = self.publication_scope(service, existing);
|
|
let token = if publication.load_balancer.is_some() || publication.dns.is_some() {
|
|
Some(self.issue_token(&org_id, &project_id).await?)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let healthy_instances: Vec<_> = instances
|
|
.iter()
|
|
.filter(|instance| {
|
|
instance_is_publishable(instance, self.config.heartbeat_timeout_secs)
|
|
})
|
|
.cloned()
|
|
.collect();
|
|
|
|
let load_balancer = if let Some(lb_spec) = publication.load_balancer.as_ref() {
|
|
Some(
|
|
self.reconcile_load_balancer(
|
|
service,
|
|
lb_spec,
|
|
&org_id,
|
|
&project_id,
|
|
token
|
|
.as_deref()
|
|
.context("publication requested without controller token")?,
|
|
&healthy_instances,
|
|
existing.and_then(|state| state.load_balancer.as_ref()),
|
|
)
|
|
.await?,
|
|
)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let dns = if let Some(dns_spec) = publication.dns.as_ref() {
|
|
self.reconcile_dns(
|
|
service,
|
|
dns_spec,
|
|
&org_id,
|
|
&project_id,
|
|
token
|
|
.as_deref()
|
|
.context("publication requested without controller token")?,
|
|
&healthy_instances,
|
|
load_balancer.as_ref(),
|
|
existing.and_then(|state| state.dns.as_ref()),
|
|
)
|
|
.await?
|
|
} else {
|
|
None
|
|
};
|
|
|
|
Ok(Some(ServicePublicationState {
|
|
service: service.name.clone(),
|
|
org_id,
|
|
project_id,
|
|
load_balancer,
|
|
dns,
|
|
observed_at: Some(Utc::now()),
|
|
}))
|
|
}
|
|
|
|
pub async fn cleanup(&self, state: &ServicePublicationState) -> Result<()> {
|
|
let Some(iam_endpoint) = self.config.iam_endpoint.as_deref() else {
|
|
warn!(service = %state.service, "skipping publication cleanup without IAM endpoint");
|
|
return Ok(());
|
|
};
|
|
let token = issue_controller_token(
|
|
iam_endpoint,
|
|
&self.config.controller_principal_id,
|
|
&state.org_id,
|
|
&state.project_id,
|
|
)
|
|
.await?;
|
|
|
|
if let Some(dns_state) = state.dns.as_ref() {
|
|
self.cleanup_dns(&token, dns_state).await?;
|
|
}
|
|
if let Some(lb_state) = state.load_balancer.as_ref() {
|
|
self.cleanup_load_balancer(&token, lb_state).await?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn publication_scope(
|
|
&self,
|
|
service: &ServiceSpec,
|
|
existing: Option<&ServicePublicationState>,
|
|
) -> (String, String) {
|
|
let publish = service.publish.as_ref();
|
|
let org_id = publish
|
|
.and_then(|spec| spec.org_id.clone())
|
|
.or_else(|| {
|
|
publish
|
|
.and_then(|spec| spec.load_balancer.as_ref())
|
|
.and_then(|spec| spec.org_id.clone())
|
|
})
|
|
.or_else(|| existing.map(|state| state.org_id.clone()))
|
|
.unwrap_or_else(|| self.config.default_org_id.clone());
|
|
let project_id = publish
|
|
.and_then(|spec| spec.project_id.clone())
|
|
.or_else(|| {
|
|
publish
|
|
.and_then(|spec| spec.load_balancer.as_ref())
|
|
.and_then(|spec| spec.project_id.clone())
|
|
})
|
|
.or_else(|| existing.map(|state| state.project_id.clone()))
|
|
.unwrap_or_else(|| self.config.default_project_id.clone());
|
|
(org_id, project_id)
|
|
}
|
|
|
|
async fn issue_token(&self, org_id: &str, project_id: &str) -> Result<String> {
|
|
let iam_endpoint = self
|
|
.config
|
|
.iam_endpoint
|
|
.as_deref()
|
|
.context("publication requires --iam-endpoint")?;
|
|
issue_controller_token(
|
|
iam_endpoint,
|
|
&self.config.controller_principal_id,
|
|
org_id,
|
|
project_id,
|
|
)
|
|
.await
|
|
}
|
|
|
|
async fn reconcile_load_balancer(
|
|
&self,
|
|
service: &ServiceSpec,
|
|
spec: &LoadBalancerPublicationSpec,
|
|
org_id: &str,
|
|
project_id: &str,
|
|
auth_token: &str,
|
|
healthy_instances: &[ServiceInstanceSpec],
|
|
existing: Option<&PublishedLoadBalancerState>,
|
|
) -> Result<PublishedLoadBalancerState> {
|
|
let Some(endpoint) = self.config.fiberlb_endpoint.as_ref() else {
|
|
warn!(service = %service.name, "publication requested without FiberLB endpoint");
|
|
return existing
|
|
.cloned()
|
|
.context("missing FiberLB endpoint for load balancer publication");
|
|
};
|
|
|
|
let listener_port = resolve_target_port(service, spec)
|
|
.context("load balancer publication requires listener_port or target port")?;
|
|
let lb_name = spec.name.clone().unwrap_or_else(|| {
|
|
sanitize_name(&format!("{}-{}", self.config.cluster_id, service.name))
|
|
});
|
|
let pool_name = format!("{lb_name}-pool");
|
|
let listener_name = format!("{lb_name}-listener-{listener_port}");
|
|
|
|
if self.config.dry_run {
|
|
info!(service = %service.name, load_balancer = %lb_name, "would reconcile native load balancer");
|
|
return Ok(existing.cloned().unwrap_or(PublishedLoadBalancerState {
|
|
id: String::new(),
|
|
pool_id: String::new(),
|
|
listener_id: String::new(),
|
|
vip_address: None,
|
|
}));
|
|
}
|
|
|
|
let mut lb_client = LoadBalancerServiceClient::connect(endpoint.clone()).await?;
|
|
let mut pool_client = PoolServiceClient::connect(endpoint.clone()).await?;
|
|
let mut listener_client = ListenerServiceClient::connect(endpoint.clone()).await?;
|
|
let mut backend_client = BackendServiceClient::connect(endpoint.clone()).await?;
|
|
|
|
let load_balancer = ensure_load_balancer(
|
|
&mut lb_client,
|
|
auth_token,
|
|
existing,
|
|
org_id,
|
|
project_id,
|
|
&lb_name,
|
|
)
|
|
.await?;
|
|
let pool = ensure_pool(
|
|
&mut pool_client,
|
|
auth_token,
|
|
existing,
|
|
&load_balancer.id,
|
|
&pool_name,
|
|
spec,
|
|
service,
|
|
)
|
|
.await?;
|
|
let listener = ensure_listener(
|
|
&mut listener_client,
|
|
auth_token,
|
|
existing,
|
|
&load_balancer.id,
|
|
&listener_name,
|
|
listener_port,
|
|
&pool.id,
|
|
spec,
|
|
service,
|
|
)
|
|
.await?;
|
|
|
|
reconcile_backends(
|
|
&mut backend_client,
|
|
auth_token,
|
|
&pool.id,
|
|
service,
|
|
healthy_instances,
|
|
)
|
|
.await?;
|
|
|
|
Ok(PublishedLoadBalancerState {
|
|
id: load_balancer.id,
|
|
pool_id: pool.id,
|
|
listener_id: listener.id,
|
|
vip_address: empty_to_none(load_balancer.vip_address)
|
|
.or_else(|| self.config.publish_address.clone()),
|
|
})
|
|
}
|
|
|
|
async fn reconcile_dns(
|
|
&self,
|
|
service: &ServiceSpec,
|
|
spec: &DnsPublicationSpec,
|
|
org_id: &str,
|
|
project_id: &str,
|
|
auth_token: &str,
|
|
healthy_instances: &[ServiceInstanceSpec],
|
|
load_balancer: Option<&PublishedLoadBalancerState>,
|
|
existing: Option<&PublishedDnsRecordState>,
|
|
) -> Result<Option<PublishedDnsRecordState>> {
|
|
let Some(endpoint) = self.config.flashdns_endpoint.as_ref() else {
|
|
warn!(service = %service.name, "DNS publication requested without FlashDNS endpoint");
|
|
return Ok(existing.cloned());
|
|
};
|
|
|
|
let desired_values = desired_dns_values(spec, healthy_instances, load_balancer);
|
|
if desired_values.is_empty() {
|
|
if let Some(existing) = existing {
|
|
self.cleanup_dns(auth_token, existing).await?;
|
|
}
|
|
return Ok(None);
|
|
}
|
|
|
|
let zone_name = normalize_zone_name(&spec.zone);
|
|
let record_name = record_name_for_service(spec, service);
|
|
let fqdn = format!("{}.{}", record_name, zone_name);
|
|
let primary_value = desired_values.first().cloned().unwrap_or_default();
|
|
|
|
if self.config.dry_run {
|
|
info!(
|
|
service = %service.name,
|
|
fqdn = %fqdn,
|
|
values = ?desired_values,
|
|
"would reconcile native DNS record set"
|
|
);
|
|
return Ok(existing.cloned().or(Some(PublishedDnsRecordState {
|
|
zone_id: String::new(),
|
|
record_id: String::new(),
|
|
record_ids: Vec::new(),
|
|
fqdn,
|
|
value: primary_value,
|
|
values: desired_values,
|
|
})));
|
|
}
|
|
|
|
let mut zone_client = ZoneServiceClient::connect(endpoint.clone()).await?;
|
|
let mut record_client = RecordServiceClient::connect(endpoint.clone()).await?;
|
|
|
|
let zone =
|
|
ensure_zone(&mut zone_client, auth_token, &zone_name, org_id, project_id).await?;
|
|
let records = ensure_records(
|
|
&mut record_client,
|
|
auth_token,
|
|
existing,
|
|
&zone.id,
|
|
&record_name,
|
|
spec.ttl,
|
|
&desired_values,
|
|
)
|
|
.await?;
|
|
let record_ids = records
|
|
.iter()
|
|
.map(|record| record.id.clone())
|
|
.collect::<Vec<_>>();
|
|
|
|
Ok(Some(PublishedDnsRecordState {
|
|
zone_id: zone.id,
|
|
record_id: record_ids.first().cloned().unwrap_or_default(),
|
|
record_ids,
|
|
fqdn,
|
|
value: primary_value,
|
|
values: desired_values,
|
|
}))
|
|
}
|
|
|
|
async fn cleanup_dns(
|
|
&self,
|
|
auth_token: &str,
|
|
dns_state: &PublishedDnsRecordState,
|
|
) -> Result<()> {
|
|
let Some(endpoint) = self.config.flashdns_endpoint.as_ref() else {
|
|
return Ok(());
|
|
};
|
|
let mut record_client = RecordServiceClient::connect(endpoint.clone()).await?;
|
|
let mut record_ids = dns_state.record_ids.clone();
|
|
if record_ids.is_empty() && !dns_state.record_id.is_empty() {
|
|
record_ids.push(dns_state.record_id.clone());
|
|
}
|
|
record_ids.sort();
|
|
record_ids.dedup();
|
|
|
|
for record_id in record_ids {
|
|
match record_client
|
|
.delete_record(authorized_request(
|
|
DeleteRecordRequest { id: record_id },
|
|
auth_token,
|
|
))
|
|
.await
|
|
{
|
|
Ok(_) => {}
|
|
Err(status) if status.code() == Code::NotFound => {}
|
|
Err(status) => return Err(status.into()),
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
async fn cleanup_load_balancer(
|
|
&self,
|
|
auth_token: &str,
|
|
lb_state: &PublishedLoadBalancerState,
|
|
) -> Result<()> {
|
|
let Some(endpoint) = self.config.fiberlb_endpoint.as_ref() else {
|
|
return Ok(());
|
|
};
|
|
|
|
let mut backend_client = BackendServiceClient::connect(endpoint.clone()).await?;
|
|
let mut listener_client = ListenerServiceClient::connect(endpoint.clone()).await?;
|
|
let mut pool_client = PoolServiceClient::connect(endpoint.clone()).await?;
|
|
let mut lb_client = LoadBalancerServiceClient::connect(endpoint.clone()).await?;
|
|
|
|
if !lb_state.pool_id.is_empty() {
|
|
if let Ok(response) = backend_client
|
|
.list_backends(authorized_request(
|
|
ListBackendsRequest {
|
|
pool_id: lb_state.pool_id.clone(),
|
|
page_size: 256,
|
|
page_token: String::new(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await
|
|
{
|
|
for backend in response.into_inner().backends {
|
|
delete_backend(&mut backend_client, auth_token, &backend.id).await?;
|
|
}
|
|
}
|
|
}
|
|
|
|
if !lb_state.listener_id.is_empty() {
|
|
match listener_client
|
|
.delete_listener(authorized_request(
|
|
DeleteListenerRequest {
|
|
id: lb_state.listener_id.clone(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await
|
|
{
|
|
Ok(_) => {}
|
|
Err(status) if status.code() == Code::NotFound => {}
|
|
Err(status) => return Err(status.into()),
|
|
}
|
|
}
|
|
|
|
if !lb_state.pool_id.is_empty() {
|
|
match pool_client
|
|
.delete_pool(authorized_request(
|
|
DeletePoolRequest {
|
|
id: lb_state.pool_id.clone(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await
|
|
{
|
|
Ok(_) => {}
|
|
Err(status) if status.code() == Code::NotFound => {}
|
|
Err(status) => return Err(status.into()),
|
|
}
|
|
}
|
|
|
|
if !lb_state.id.is_empty() {
|
|
match lb_client
|
|
.delete_load_balancer(authorized_request(
|
|
DeleteLoadBalancerRequest {
|
|
id: lb_state.id.clone(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await
|
|
{
|
|
Ok(_) => {}
|
|
Err(status) if status.code() == Code::NotFound => {}
|
|
Err(status) => return Err(status.into()),
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
async fn ensure_load_balancer(
|
|
client: &mut LoadBalancerServiceClient<tonic::transport::Channel>,
|
|
auth_token: &str,
|
|
existing: Option<&PublishedLoadBalancerState>,
|
|
org_id: &str,
|
|
project_id: &str,
|
|
name: &str,
|
|
) -> Result<fiberlb_api::LoadBalancer> {
|
|
let current = client
|
|
.list_load_balancers(authorized_request(
|
|
ListLoadBalancersRequest {
|
|
org_id: org_id.to_string(),
|
|
project_id: project_id.to_string(),
|
|
page_size: 256,
|
|
page_token: String::new(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.loadbalancers
|
|
.into_iter()
|
|
.find(|lb| {
|
|
existing.map(|state| state.id.as_str()) == Some(lb.id.as_str()) || lb.name == name
|
|
});
|
|
|
|
if let Some(load_balancer) = current {
|
|
return Ok(load_balancer);
|
|
}
|
|
|
|
Ok(client
|
|
.create_load_balancer(authorized_request(
|
|
CreateLoadBalancerRequest {
|
|
name: name.to_string(),
|
|
org_id: org_id.to_string(),
|
|
project_id: project_id.to_string(),
|
|
description: format!("native runtime service {name}"),
|
|
vip_address: String::new(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.loadbalancer
|
|
.context("FiberLB returned empty CreateLoadBalancer response")?)
|
|
}
|
|
|
|
async fn ensure_pool(
|
|
client: &mut PoolServiceClient<tonic::transport::Channel>,
|
|
auth_token: &str,
|
|
existing: Option<&PublishedLoadBalancerState>,
|
|
load_balancer_id: &str,
|
|
name: &str,
|
|
spec: &LoadBalancerPublicationSpec,
|
|
service: &ServiceSpec,
|
|
) -> Result<Pool> {
|
|
let current = client
|
|
.list_pools(authorized_request(
|
|
ListPoolsRequest {
|
|
loadbalancer_id: load_balancer_id.to_string(),
|
|
page_size: 256,
|
|
page_token: String::new(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.pools
|
|
.into_iter()
|
|
.find(|pool| {
|
|
existing.map(|state| state.pool_id.as_str()) == Some(pool.id.as_str())
|
|
|| pool.name == name
|
|
});
|
|
|
|
if let Some(pool) = current {
|
|
return Ok(pool);
|
|
}
|
|
|
|
Ok(client
|
|
.create_pool(authorized_request(
|
|
CreatePoolRequest {
|
|
name: name.to_string(),
|
|
loadbalancer_id: load_balancer_id.to_string(),
|
|
algorithm: PoolAlgorithm::RoundRobin as i32,
|
|
protocol: pool_protocol(spec, service) as i32,
|
|
session_persistence: None,
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.pool
|
|
.context("FiberLB returned empty CreatePool response")?)
|
|
}
|
|
|
|
async fn ensure_listener(
|
|
client: &mut ListenerServiceClient<tonic::transport::Channel>,
|
|
auth_token: &str,
|
|
existing: Option<&PublishedLoadBalancerState>,
|
|
load_balancer_id: &str,
|
|
name: &str,
|
|
port: u16,
|
|
default_pool_id: &str,
|
|
spec: &LoadBalancerPublicationSpec,
|
|
service: &ServiceSpec,
|
|
) -> Result<Listener> {
|
|
let listeners = client
|
|
.list_listeners(authorized_request(
|
|
ListListenersRequest {
|
|
loadbalancer_id: load_balancer_id.to_string(),
|
|
page_size: 256,
|
|
page_token: String::new(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.listeners;
|
|
|
|
if let Some(listener) = listeners.iter().find(|listener| {
|
|
existing.map(|state| state.listener_id.as_str()) == Some(listener.id.as_str())
|
|
|| listener.name == name
|
|
}) {
|
|
let listener = listener.clone();
|
|
if listener.port == port as u32
|
|
&& listener.protocol == listener_protocol(spec, service) as i32
|
|
&& listener.default_pool_id == default_pool_id
|
|
{
|
|
return Ok(listener);
|
|
}
|
|
|
|
client
|
|
.delete_listener(authorized_request(
|
|
DeleteListenerRequest {
|
|
id: listener.id.clone(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?;
|
|
}
|
|
|
|
Ok(client
|
|
.create_listener(authorized_request(
|
|
CreateListenerRequest {
|
|
name: name.to_string(),
|
|
loadbalancer_id: load_balancer_id.to_string(),
|
|
protocol: listener_protocol(spec, service) as i32,
|
|
port: port as u32,
|
|
default_pool_id: default_pool_id.to_string(),
|
|
tls_config: None,
|
|
connection_limit: 0,
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.listener
|
|
.context("FiberLB returned empty CreateListener response")?)
|
|
}
|
|
|
|
async fn reconcile_backends(
|
|
client: &mut BackendServiceClient<tonic::transport::Channel>,
|
|
auth_token: &str,
|
|
pool_id: &str,
|
|
service: &ServiceSpec,
|
|
healthy_instances: &[ServiceInstanceSpec],
|
|
) -> Result<()> {
|
|
let existing = client
|
|
.list_backends(authorized_request(
|
|
ListBackendsRequest {
|
|
pool_id: pool_id.to_string(),
|
|
page_size: 256,
|
|
page_token: String::new(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.backends;
|
|
|
|
let desired_names: HashSet<String> = healthy_instances
|
|
.iter()
|
|
.map(|instance| backend_name_for_instance(service, instance))
|
|
.collect();
|
|
|
|
for backend in &existing {
|
|
if !desired_names.contains(&backend.name) {
|
|
delete_backend(client, auth_token, &backend.id).await?;
|
|
}
|
|
}
|
|
|
|
for instance in healthy_instances {
|
|
let backend_name = backend_name_for_instance(service, instance);
|
|
let matching = existing.iter().find(|backend| backend.name == backend_name);
|
|
|
|
if let Some(backend) = matching {
|
|
if backend.address == instance.ip
|
|
&& backend.port == instance.port as u32
|
|
&& backend.admin_state == BackendAdminState::Enabled as i32
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if backend.address != instance.ip || backend.port != instance.port as u32 {
|
|
delete_backend(client, auth_token, &backend.id).await?;
|
|
} else {
|
|
client
|
|
.update_backend(authorized_request(
|
|
UpdateBackendRequest {
|
|
id: backend.id.clone(),
|
|
name: backend.name.clone(),
|
|
weight: backend.weight,
|
|
admin_state: BackendAdminState::Enabled as i32,
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
client
|
|
.create_backend(authorized_request(
|
|
CreateBackendRequest {
|
|
name: backend_name,
|
|
pool_id: pool_id.to_string(),
|
|
address: instance.ip.clone(),
|
|
port: instance.port as u32,
|
|
weight: 1,
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn delete_backend(
|
|
client: &mut BackendServiceClient<tonic::transport::Channel>,
|
|
auth_token: &str,
|
|
backend_id: &str,
|
|
) -> Result<()> {
|
|
match client
|
|
.delete_backend(authorized_request(
|
|
DeleteBackendRequest {
|
|
id: backend_id.to_string(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await
|
|
{
|
|
Ok(_) => Ok(()),
|
|
Err(status) if status.code() == Code::NotFound => Ok(()),
|
|
Err(status) => Err(status.into()),
|
|
}
|
|
}
|
|
|
|
async fn ensure_zone(
|
|
client: &mut ZoneServiceClient<tonic::transport::Channel>,
|
|
auth_token: &str,
|
|
zone_name: &str,
|
|
org_id: &str,
|
|
project_id: &str,
|
|
) -> Result<ZoneInfo> {
|
|
let response = client
|
|
.list_zones(authorized_request(
|
|
ListZonesRequest {
|
|
org_id: org_id.to_string(),
|
|
project_id: project_id.to_string(),
|
|
name_filter: zone_name.to_string(),
|
|
page_size: 256,
|
|
page_token: String::new(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?;
|
|
|
|
if let Some(zone) = response
|
|
.into_inner()
|
|
.zones
|
|
.into_iter()
|
|
.find(|zone| normalize_zone_name(&zone.name) == zone_name)
|
|
{
|
|
return Ok(zone);
|
|
}
|
|
|
|
Ok(client
|
|
.create_zone(authorized_request(
|
|
CreateZoneRequest {
|
|
name: zone_name.to_string(),
|
|
org_id: org_id.to_string(),
|
|
project_id: project_id.to_string(),
|
|
primary_ns: "ns1.native.cluster".to_string(),
|
|
admin_email: "admin@native.cluster".to_string(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.zone
|
|
.context("FlashDNS returned empty CreateZone response")?)
|
|
}
|
|
|
|
async fn ensure_records(
|
|
client: &mut RecordServiceClient<tonic::transport::Channel>,
|
|
auth_token: &str,
|
|
existing: Option<&PublishedDnsRecordState>,
|
|
zone_id: &str,
|
|
name: &str,
|
|
ttl: u32,
|
|
desired_values: &[String],
|
|
) -> Result<Vec<RecordInfo>> {
|
|
let records = client
|
|
.list_records(authorized_request(
|
|
ListRecordsRequest {
|
|
zone_id: zone_id.to_string(),
|
|
name_filter: name.to_string(),
|
|
type_filter: "A".to_string(),
|
|
page_size: 256,
|
|
page_token: String::new(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.records;
|
|
|
|
let mut matching = records
|
|
.iter()
|
|
.filter(|record| {
|
|
record.name == name
|
|
|| existing.map(|state| state.record_id.as_str()) == Some(record.id.as_str())
|
|
|| existing
|
|
.map(|state| state.record_ids.iter().any(|id| id == &record.id))
|
|
.unwrap_or(false)
|
|
})
|
|
.cloned()
|
|
.collect::<Vec<_>>();
|
|
matching.sort_by(|lhs, rhs| {
|
|
record_a_value(lhs)
|
|
.cmp(&record_a_value(rhs))
|
|
.then_with(|| lhs.id.cmp(&rhs.id))
|
|
});
|
|
|
|
let mut records_by_value: BTreeMap<String, Vec<RecordInfo>> = BTreeMap::new();
|
|
for record in matching {
|
|
let Some(value) = record_a_value(&record) else {
|
|
continue;
|
|
};
|
|
records_by_value.entry(value).or_default().push(record);
|
|
}
|
|
|
|
let mut ensured = Vec::new();
|
|
for desired_value in desired_values {
|
|
if let Some(record) = records_by_value.get_mut(desired_value).and_then(|records| {
|
|
if records.is_empty() {
|
|
None
|
|
} else {
|
|
Some(records.remove(0))
|
|
}
|
|
}) {
|
|
if record.ttl != ttl || !record.enabled {
|
|
let updated = client
|
|
.update_record(authorized_request(
|
|
UpdateRecordRequest {
|
|
id: record.id.clone(),
|
|
ttl: Some(ttl),
|
|
data: Some(RecordData {
|
|
data: Some(record_data::Data::A(ARecord {
|
|
address: desired_value.to_string(),
|
|
})),
|
|
}),
|
|
enabled: Some(true),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.record
|
|
.context("FlashDNS returned empty UpdateRecord response")?;
|
|
ensured.push(updated);
|
|
} else {
|
|
ensured.push(record);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
ensured.push(
|
|
client
|
|
.create_record(authorized_request(
|
|
CreateRecordRequest {
|
|
zone_id: zone_id.to_string(),
|
|
name: name.to_string(),
|
|
record_type: "A".to_string(),
|
|
ttl,
|
|
data: Some(RecordData {
|
|
data: Some(record_data::Data::A(ARecord {
|
|
address: desired_value.to_string(),
|
|
})),
|
|
}),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await?
|
|
.into_inner()
|
|
.record
|
|
.context("FlashDNS returned empty CreateRecord response")?,
|
|
);
|
|
}
|
|
|
|
for extra in records_by_value.into_values().flatten() {
|
|
delete_record(client, auth_token, &extra.id).await?;
|
|
}
|
|
|
|
ensured.sort_by(|lhs, rhs| {
|
|
record_a_value(lhs)
|
|
.cmp(&record_a_value(rhs))
|
|
.then_with(|| lhs.id.cmp(&rhs.id))
|
|
});
|
|
Ok(ensured)
|
|
}
|
|
|
|
async fn delete_record(
|
|
client: &mut RecordServiceClient<tonic::transport::Channel>,
|
|
auth_token: &str,
|
|
record_id: &str,
|
|
) -> Result<()> {
|
|
match client
|
|
.delete_record(authorized_request(
|
|
DeleteRecordRequest {
|
|
id: record_id.to_string(),
|
|
},
|
|
auth_token,
|
|
))
|
|
.await
|
|
{
|
|
Ok(_) => Ok(()),
|
|
Err(status) if status.code() == Code::NotFound => Ok(()),
|
|
Err(status) => Err(status.into()),
|
|
}
|
|
}
|
|
|
|
fn resolve_target_port(service: &ServiceSpec, spec: &LoadBalancerPublicationSpec) -> Option<u16> {
|
|
spec.listener_port
|
|
.or_else(|| {
|
|
service
|
|
.schedule
|
|
.as_ref()
|
|
.and_then(|schedule| schedule.instance_port)
|
|
})
|
|
.or_else(|| service.ports.as_ref().and_then(|ports| ports.http))
|
|
.or_else(|| service.ports.as_ref().and_then(|ports| ports.grpc))
|
|
}
|
|
|
|
fn record_a_value(record: &RecordInfo) -> Option<String> {
|
|
record
|
|
.data
|
|
.as_ref()
|
|
.and_then(|data| data.data.as_ref())
|
|
.and_then(|data| match data {
|
|
record_data::Data::A(record) => Some(record.address.clone()),
|
|
_ => None,
|
|
})
|
|
}
|
|
|
|
fn normalize_dns_values(values: impl IntoIterator<Item = String>) -> Vec<String> {
|
|
let mut values = values
|
|
.into_iter()
|
|
.map(|value| value.trim().to_string())
|
|
.filter(|value| !value.is_empty())
|
|
.collect::<Vec<_>>();
|
|
values.sort();
|
|
values.dedup();
|
|
values
|
|
}
|
|
|
|
fn desired_dns_values(
|
|
spec: &DnsPublicationSpec,
|
|
healthy_instances: &[ServiceInstanceSpec],
|
|
load_balancer: Option<&PublishedLoadBalancerState>,
|
|
) -> Vec<String> {
|
|
match spec.mode {
|
|
DnsPublishMode::LoadBalancer => normalize_dns_values(
|
|
load_balancer
|
|
.and_then(|state| state.vip_address.clone())
|
|
.filter(|value| !value.is_empty() && value != "0.0.0.0")
|
|
.or_else(|| {
|
|
healthy_instances
|
|
.first()
|
|
.map(|instance| instance.ip.clone())
|
|
})
|
|
.into_iter(),
|
|
),
|
|
DnsPublishMode::Direct => normalize_dns_values(
|
|
healthy_instances
|
|
.first()
|
|
.map(|instance| instance.ip.clone())
|
|
.into_iter(),
|
|
),
|
|
DnsPublishMode::DirectMulti => {
|
|
normalize_dns_values(healthy_instances.iter().map(|instance| instance.ip.clone()))
|
|
}
|
|
}
|
|
}
|
|
|
|
fn instance_is_publishable(instance: &ServiceInstanceSpec, heartbeat_timeout_secs: u64) -> bool {
|
|
if instance.state.as_deref() != Some("healthy") {
|
|
return false;
|
|
}
|
|
|
|
if heartbeat_timeout_secs == 0 {
|
|
return true;
|
|
}
|
|
|
|
let Some(last_heartbeat) = instance.last_heartbeat.or(instance.observed_at) else {
|
|
return false;
|
|
};
|
|
|
|
Utc::now()
|
|
.signed_duration_since(last_heartbeat)
|
|
.num_seconds()
|
|
<= heartbeat_timeout_secs as i64
|
|
}
|
|
|
|
fn record_name_for_service(spec: &DnsPublicationSpec, service: &ServiceSpec) -> String {
|
|
let zone_name = normalize_zone_name(&spec.zone);
|
|
let raw_name = spec.name.clone().unwrap_or_else(|| service.name.clone());
|
|
let trimmed = raw_name.trim_end_matches('.').to_string();
|
|
let suffix = format!(".{}", zone_name);
|
|
trimmed
|
|
.strip_suffix(&suffix)
|
|
.unwrap_or(trimmed.as_str())
|
|
.to_string()
|
|
}
|
|
|
|
fn normalize_zone_name(zone: &str) -> String {
|
|
zone.trim_end_matches('.').to_string()
|
|
}
|
|
|
|
fn sanitize_name(value: &str) -> String {
|
|
value
|
|
.chars()
|
|
.map(|ch| {
|
|
if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_') {
|
|
ch
|
|
} else {
|
|
'-'
|
|
}
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
fn backend_name_for_instance(service: &ServiceSpec, instance: &ServiceInstanceSpec) -> String {
|
|
sanitize_name(&format!("{}-{}", service.name, instance.instance_id))
|
|
}
|
|
|
|
fn listener_protocol(
|
|
spec: &LoadBalancerPublicationSpec,
|
|
service: &ServiceSpec,
|
|
) -> ListenerProtocol {
|
|
match spec
|
|
.protocol
|
|
.as_deref()
|
|
.or(service.protocol.as_deref())
|
|
.unwrap_or("tcp")
|
|
{
|
|
"http" => ListenerProtocol::Http,
|
|
"https" => ListenerProtocol::Https,
|
|
"terminated_https" => ListenerProtocol::TerminatedHttps,
|
|
"udp" => ListenerProtocol::Udp,
|
|
_ => ListenerProtocol::Tcp,
|
|
}
|
|
}
|
|
|
|
fn pool_protocol(spec: &LoadBalancerPublicationSpec, service: &ServiceSpec) -> PoolProtocol {
|
|
match spec
|
|
.pool_protocol
|
|
.as_deref()
|
|
.or(spec.protocol.as_deref())
|
|
.or(service.protocol.as_deref())
|
|
.unwrap_or("tcp")
|
|
{
|
|
"http" => PoolProtocol::Http,
|
|
"https" => PoolProtocol::Https,
|
|
"udp" => PoolProtocol::Udp,
|
|
_ => PoolProtocol::Tcp,
|
|
}
|
|
}
|
|
|
|
fn empty_to_none(value: String) -> Option<String> {
|
|
if value.trim().is_empty() {
|
|
None
|
|
} else {
|
|
Some(value)
|
|
}
|
|
}
|
|
|
|
fn publication_key(cluster_namespace: &str, cluster_id: &str, service: &str) -> Vec<u8> {
|
|
format!(
|
|
"{}/clusters/{}/publications/{}",
|
|
cluster_namespace, cluster_id, service
|
|
)
|
|
.into_bytes()
|
|
}
|
|
|
|
async fn load_publication_states(
|
|
client: &mut Client,
|
|
cluster_namespace: &str,
|
|
cluster_id: &str,
|
|
) -> Result<HashMap<String, ServicePublicationState>> {
|
|
let prefix = format!(
|
|
"{}/clusters/{}/publications/",
|
|
cluster_namespace, cluster_id
|
|
);
|
|
let kvs = client.get_prefix(prefix.as_bytes()).await?;
|
|
let mut states = HashMap::with_capacity(kvs.len());
|
|
|
|
for (_key, value) in kvs {
|
|
match serde_json::from_slice::<ServicePublicationState>(&value) {
|
|
Ok(state) => {
|
|
states.insert(state.service.clone(), state);
|
|
}
|
|
Err(error) => warn!(error = %error, "failed to decode service publication state"),
|
|
}
|
|
}
|
|
|
|
Ok(states)
|
|
}
|
|
|
|
async fn load_service_instances(
|
|
client: &mut Client,
|
|
cluster_namespace: &str,
|
|
cluster_id: &str,
|
|
service: &str,
|
|
) -> Result<Vec<ServiceInstanceSpec>> {
|
|
let prefix = format!(
|
|
"{}/clusters/{}/instances/{}/",
|
|
cluster_namespace, cluster_id, service
|
|
);
|
|
let kvs = client.get_prefix(prefix.as_bytes()).await?;
|
|
let mut instances = Vec::with_capacity(kvs.len());
|
|
|
|
for (_key, value) in kvs {
|
|
match serde_json::from_slice::<ServiceInstanceSpec>(&value) {
|
|
Ok(instance) => instances.push(instance),
|
|
Err(error) => {
|
|
warn!(service = %service, error = %error, "failed to decode service instance")
|
|
}
|
|
}
|
|
}
|
|
|
|
instances.sort_by(|lhs, rhs| lhs.instance_id.cmp(&rhs.instance_id));
|
|
Ok(instances)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use deployer_types::{ServicePorts, ServicePublicationSpec, ServiceScheduleSpec};
|
|
|
|
fn service_with_dns(name: &str, zone: &str) -> ServiceSpec {
|
|
ServiceSpec {
|
|
name: name.to_string(),
|
|
ports: Some(ServicePorts {
|
|
http: Some(8080),
|
|
grpc: None,
|
|
}),
|
|
protocol: Some("http".to_string()),
|
|
mtls_required: None,
|
|
mesh_mode: None,
|
|
schedule: Some(ServiceScheduleSpec::default()),
|
|
publish: Some(ServicePublicationSpec {
|
|
org_id: Some("default-org".to_string()),
|
|
project_id: Some("default-project".to_string()),
|
|
dns: Some(DnsPublicationSpec {
|
|
zone: zone.to_string(),
|
|
name: None,
|
|
ttl: 60,
|
|
mode: DnsPublishMode::LoadBalancer,
|
|
}),
|
|
load_balancer: None,
|
|
}),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_record_name_strips_zone_suffix() {
|
|
let spec = DnsPublicationSpec {
|
|
zone: "native.cluster.test".to_string(),
|
|
name: Some("api.native.cluster.test".to_string()),
|
|
ttl: 60,
|
|
mode: DnsPublishMode::LoadBalancer,
|
|
};
|
|
let service = service_with_dns("api", "native.cluster.test");
|
|
assert_eq!(record_name_for_service(&spec, &service), "api");
|
|
}
|
|
|
|
#[test]
|
|
fn test_dns_values_fall_back_to_healthy_instance_when_vip_missing() {
|
|
let spec = DnsPublicationSpec {
|
|
zone: "native.cluster.test".to_string(),
|
|
name: Some("api".to_string()),
|
|
ttl: 60,
|
|
mode: DnsPublishMode::LoadBalancer,
|
|
};
|
|
let instances = vec![ServiceInstanceSpec {
|
|
instance_id: "api-node01".to_string(),
|
|
service: "api".to_string(),
|
|
node_id: "node01".to_string(),
|
|
ip: "10.0.0.11".to_string(),
|
|
port: 8080,
|
|
mesh_port: None,
|
|
version: None,
|
|
health_check: None,
|
|
process: None,
|
|
container: None,
|
|
managed_by: None,
|
|
state: Some("healthy".to_string()),
|
|
last_heartbeat: None,
|
|
observed_at: None,
|
|
}];
|
|
|
|
assert_eq!(
|
|
desired_dns_values(&spec, &instances, None),
|
|
vec!["10.0.0.11".to_string()]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_direct_multi_dns_publishes_all_healthy_instance_ips() {
|
|
let spec = DnsPublicationSpec {
|
|
zone: "native.cluster.test".to_string(),
|
|
name: Some("daemon".to_string()),
|
|
ttl: 60,
|
|
mode: DnsPublishMode::DirectMulti,
|
|
};
|
|
let instances = vec![
|
|
ServiceInstanceSpec {
|
|
instance_id: "daemon-node02".to_string(),
|
|
service: "daemon".to_string(),
|
|
node_id: "node02".to_string(),
|
|
ip: "10.0.0.12".to_string(),
|
|
port: 8080,
|
|
mesh_port: None,
|
|
version: None,
|
|
health_check: None,
|
|
process: None,
|
|
container: None,
|
|
managed_by: None,
|
|
state: Some("healthy".to_string()),
|
|
last_heartbeat: None,
|
|
observed_at: None,
|
|
},
|
|
ServiceInstanceSpec {
|
|
instance_id: "daemon-node01".to_string(),
|
|
service: "daemon".to_string(),
|
|
node_id: "node01".to_string(),
|
|
ip: "10.0.0.11".to_string(),
|
|
port: 8080,
|
|
mesh_port: None,
|
|
version: None,
|
|
health_check: None,
|
|
process: None,
|
|
container: None,
|
|
managed_by: None,
|
|
state: Some("healthy".to_string()),
|
|
last_heartbeat: None,
|
|
observed_at: None,
|
|
},
|
|
ServiceInstanceSpec {
|
|
instance_id: "daemon-node03".to_string(),
|
|
service: "daemon".to_string(),
|
|
node_id: "node03".to_string(),
|
|
ip: "10.0.0.11".to_string(),
|
|
port: 8080,
|
|
mesh_port: None,
|
|
version: None,
|
|
health_check: None,
|
|
process: None,
|
|
container: None,
|
|
managed_by: None,
|
|
state: Some("healthy".to_string()),
|
|
last_heartbeat: None,
|
|
observed_at: None,
|
|
},
|
|
];
|
|
|
|
assert_eq!(
|
|
desired_dns_values(&spec, &instances, None),
|
|
vec!["10.0.0.11".to_string(), "10.0.0.12".to_string()]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_publishable_instance_requires_fresh_heartbeat() {
|
|
let now = Utc::now();
|
|
let mut fresh = ServiceInstanceSpec {
|
|
instance_id: "api-node01".to_string(),
|
|
service: "api".to_string(),
|
|
node_id: "node01".to_string(),
|
|
ip: "10.0.0.11".to_string(),
|
|
port: 8080,
|
|
mesh_port: None,
|
|
version: None,
|
|
health_check: None,
|
|
process: None,
|
|
container: None,
|
|
managed_by: None,
|
|
state: Some("healthy".to_string()),
|
|
last_heartbeat: Some(now),
|
|
observed_at: None,
|
|
};
|
|
|
|
assert!(instance_is_publishable(&fresh, 60));
|
|
|
|
fresh.last_heartbeat = Some(now - chrono::Duration::seconds(120));
|
|
assert!(!instance_is_publishable(&fresh, 60));
|
|
|
|
fresh.last_heartbeat = Some(now);
|
|
fresh.state = Some("unhealthy".to_string());
|
|
assert!(!instance_is_publishable(&fresh, 60));
|
|
}
|
|
}
|