use std::collections::{BTreeMap, HashMap, HashSet}; use anyhow::{Context, Result}; use chainfire_client::Client; use chrono::Utc; use deployer_types::{ DnsPublicationSpec, DnsPublishMode, LoadBalancerPublicationSpec, PublishedDnsRecordState, PublishedLoadBalancerState, ServiceInstanceSpec, ServicePublicationState, ServiceSpec, }; use fiberlb_api::backend_service_client::BackendServiceClient; use fiberlb_api::listener_service_client::ListenerServiceClient; use fiberlb_api::load_balancer_service_client::LoadBalancerServiceClient; use fiberlb_api::pool_service_client::PoolServiceClient; use fiberlb_api::{ BackendAdminState, CreateBackendRequest, CreateListenerRequest, CreateLoadBalancerRequest, CreatePoolRequest, DeleteBackendRequest, DeleteListenerRequest, DeleteLoadBalancerRequest, DeletePoolRequest, ListBackendsRequest, ListListenersRequest, ListLoadBalancersRequest, ListPoolsRequest, Listener, ListenerProtocol, Pool, PoolAlgorithm, PoolProtocol, UpdateBackendRequest, }; use flashdns_api::proto::record_data; use flashdns_api::proto::{ ARecord, CreateRecordRequest, CreateZoneRequest, DeleteRecordRequest, ListRecordsRequest, ListZonesRequest, RecordData, RecordInfo, UpdateRecordRequest, ZoneInfo, }; use flashdns_api::{RecordServiceClient, ZoneServiceClient}; use tonic::Code; use tracing::{info, warn}; use crate::auth::{authorized_request, issue_controller_token}; pub struct PublicationConfig { pub cluster_id: String, pub heartbeat_timeout_secs: u64, pub iam_endpoint: Option, pub fiberlb_endpoint: Option, pub flashdns_endpoint: Option, pub publish_address: Option, pub controller_principal_id: String, pub default_org_id: String, pub default_project_id: String, pub dry_run: bool, } pub struct PublicationReconciler { controller: PublicationController, } pub struct PublicationController { config: PublicationConfig, } impl PublicationReconciler { pub fn new(config: PublicationConfig) -> Self { Self { controller: PublicationController::new(config), } } pub async fn reconcile_all( &self, client: &mut Client, cluster_namespace: &str, cluster_id: &str, services: &[ServiceSpec], _dry_run: bool, ) -> Result<()> { let existing = load_publication_states(client, cluster_namespace, cluster_id).await?; let mut desired_services = HashSet::new(); for service in services.iter().filter(|service| service.publish.is_some()) { desired_services.insert(service.name.clone()); let instances = load_service_instances(client, cluster_namespace, cluster_id, &service.name) .await?; let next = self .controller .reconcile(service, &instances, existing.get(&service.name)) .await?; let key = publication_key(cluster_namespace, cluster_id, &service.name); match next { Some(state) => { client.put(&key, &serde_json::to_vec(&state)?).await?; } None => { client.delete(&key).await?; } } } for (service_name, state) in existing { if desired_services.contains(&service_name) { continue; } self.controller.cleanup(&state).await?; client .delete(&publication_key( cluster_namespace, cluster_id, &service_name, )) .await?; } Ok(()) } } impl PublicationController { pub fn new(config: PublicationConfig) -> Self { Self { config } } pub async fn reconcile( &self, service: &ServiceSpec, instances: &[ServiceInstanceSpec], existing: Option<&ServicePublicationState>, ) -> Result> { let Some(publication) = service.publish.as_ref() else { return Ok(None); }; let (org_id, project_id) = self.publication_scope(service, existing); let token = if publication.load_balancer.is_some() || publication.dns.is_some() { Some(self.issue_token(&org_id, &project_id).await?) } else { None }; let healthy_instances: Vec<_> = instances .iter() .filter(|instance| { instance_is_publishable(instance, self.config.heartbeat_timeout_secs) }) .cloned() .collect(); let load_balancer = if let Some(lb_spec) = publication.load_balancer.as_ref() { Some( self.reconcile_load_balancer( service, lb_spec, &org_id, &project_id, token .as_deref() .context("publication requested without controller token")?, &healthy_instances, existing.and_then(|state| state.load_balancer.as_ref()), ) .await?, ) } else { None }; let dns = if let Some(dns_spec) = publication.dns.as_ref() { self.reconcile_dns( service, dns_spec, &org_id, &project_id, token .as_deref() .context("publication requested without controller token")?, &healthy_instances, load_balancer.as_ref(), existing.and_then(|state| state.dns.as_ref()), ) .await? } else { None }; Ok(Some(ServicePublicationState { service: service.name.clone(), org_id, project_id, load_balancer, dns, observed_at: Some(Utc::now()), })) } pub async fn cleanup(&self, state: &ServicePublicationState) -> Result<()> { let Some(iam_endpoint) = self.config.iam_endpoint.as_deref() else { warn!(service = %state.service, "skipping publication cleanup without IAM endpoint"); return Ok(()); }; let token = issue_controller_token( iam_endpoint, &self.config.controller_principal_id, &state.org_id, &state.project_id, ) .await?; if let Some(dns_state) = state.dns.as_ref() { self.cleanup_dns(&token, dns_state).await?; } if let Some(lb_state) = state.load_balancer.as_ref() { self.cleanup_load_balancer(&token, lb_state).await?; } Ok(()) } fn publication_scope( &self, service: &ServiceSpec, existing: Option<&ServicePublicationState>, ) -> (String, String) { let publish = service.publish.as_ref(); let org_id = publish .and_then(|spec| spec.org_id.clone()) .or_else(|| { publish .and_then(|spec| spec.load_balancer.as_ref()) .and_then(|spec| spec.org_id.clone()) }) .or_else(|| existing.map(|state| state.org_id.clone())) .unwrap_or_else(|| self.config.default_org_id.clone()); let project_id = publish .and_then(|spec| spec.project_id.clone()) .or_else(|| { publish .and_then(|spec| spec.load_balancer.as_ref()) .and_then(|spec| spec.project_id.clone()) }) .or_else(|| existing.map(|state| state.project_id.clone())) .unwrap_or_else(|| self.config.default_project_id.clone()); (org_id, project_id) } async fn issue_token(&self, org_id: &str, project_id: &str) -> Result { let iam_endpoint = self .config .iam_endpoint .as_deref() .context("publication requires --iam-endpoint")?; issue_controller_token( iam_endpoint, &self.config.controller_principal_id, org_id, project_id, ) .await } async fn reconcile_load_balancer( &self, service: &ServiceSpec, spec: &LoadBalancerPublicationSpec, org_id: &str, project_id: &str, auth_token: &str, healthy_instances: &[ServiceInstanceSpec], existing: Option<&PublishedLoadBalancerState>, ) -> Result { let Some(endpoint) = self.config.fiberlb_endpoint.as_ref() else { warn!(service = %service.name, "publication requested without FiberLB endpoint"); return existing .cloned() .context("missing FiberLB endpoint for load balancer publication"); }; let listener_port = resolve_target_port(service, spec) .context("load balancer publication requires listener_port or target port")?; let lb_name = spec.name.clone().unwrap_or_else(|| { sanitize_name(&format!("{}-{}", self.config.cluster_id, service.name)) }); let pool_name = format!("{lb_name}-pool"); let listener_name = format!("{lb_name}-listener-{listener_port}"); if self.config.dry_run { info!(service = %service.name, load_balancer = %lb_name, "would reconcile native load balancer"); return Ok(existing.cloned().unwrap_or(PublishedLoadBalancerState { id: String::new(), pool_id: String::new(), listener_id: String::new(), vip_address: None, })); } let mut lb_client = LoadBalancerServiceClient::connect(endpoint.clone()).await?; let mut pool_client = PoolServiceClient::connect(endpoint.clone()).await?; let mut listener_client = ListenerServiceClient::connect(endpoint.clone()).await?; let mut backend_client = BackendServiceClient::connect(endpoint.clone()).await?; let load_balancer = ensure_load_balancer( &mut lb_client, auth_token, existing, org_id, project_id, &lb_name, ) .await?; let pool = ensure_pool( &mut pool_client, auth_token, existing, &load_balancer.id, &pool_name, spec, service, ) .await?; let listener = ensure_listener( &mut listener_client, auth_token, existing, &load_balancer.id, &listener_name, listener_port, &pool.id, spec, service, ) .await?; reconcile_backends( &mut backend_client, auth_token, &pool.id, service, healthy_instances, ) .await?; Ok(PublishedLoadBalancerState { id: load_balancer.id, pool_id: pool.id, listener_id: listener.id, vip_address: empty_to_none(load_balancer.vip_address) .or_else(|| self.config.publish_address.clone()), }) } async fn reconcile_dns( &self, service: &ServiceSpec, spec: &DnsPublicationSpec, org_id: &str, project_id: &str, auth_token: &str, healthy_instances: &[ServiceInstanceSpec], load_balancer: Option<&PublishedLoadBalancerState>, existing: Option<&PublishedDnsRecordState>, ) -> Result> { let Some(endpoint) = self.config.flashdns_endpoint.as_ref() else { warn!(service = %service.name, "DNS publication requested without FlashDNS endpoint"); return Ok(existing.cloned()); }; let desired_values = desired_dns_values(spec, healthy_instances, load_balancer); if desired_values.is_empty() { if let Some(existing) = existing { self.cleanup_dns(auth_token, existing).await?; } return Ok(None); } let zone_name = normalize_zone_name(&spec.zone); let record_name = record_name_for_service(spec, service); let fqdn = format!("{}.{}", record_name, zone_name); let primary_value = desired_values.first().cloned().unwrap_or_default(); if self.config.dry_run { info!( service = %service.name, fqdn = %fqdn, values = ?desired_values, "would reconcile native DNS record set" ); return Ok(existing.cloned().or(Some(PublishedDnsRecordState { zone_id: String::new(), record_id: String::new(), record_ids: Vec::new(), fqdn, value: primary_value, values: desired_values, }))); } let mut zone_client = ZoneServiceClient::connect(endpoint.clone()).await?; let mut record_client = RecordServiceClient::connect(endpoint.clone()).await?; let zone = ensure_zone(&mut zone_client, auth_token, &zone_name, org_id, project_id).await?; let records = ensure_records( &mut record_client, auth_token, existing, &zone.id, &record_name, spec.ttl, &desired_values, ) .await?; let record_ids = records .iter() .map(|record| record.id.clone()) .collect::>(); Ok(Some(PublishedDnsRecordState { zone_id: zone.id, record_id: record_ids.first().cloned().unwrap_or_default(), record_ids, fqdn, value: primary_value, values: desired_values, })) } async fn cleanup_dns( &self, auth_token: &str, dns_state: &PublishedDnsRecordState, ) -> Result<()> { let Some(endpoint) = self.config.flashdns_endpoint.as_ref() else { return Ok(()); }; let mut record_client = RecordServiceClient::connect(endpoint.clone()).await?; let mut record_ids = dns_state.record_ids.clone(); if record_ids.is_empty() && !dns_state.record_id.is_empty() { record_ids.push(dns_state.record_id.clone()); } record_ids.sort(); record_ids.dedup(); for record_id in record_ids { match record_client .delete_record(authorized_request( DeleteRecordRequest { id: record_id }, auth_token, )) .await { Ok(_) => {} Err(status) if status.code() == Code::NotFound => {} Err(status) => return Err(status.into()), } } Ok(()) } async fn cleanup_load_balancer( &self, auth_token: &str, lb_state: &PublishedLoadBalancerState, ) -> Result<()> { let Some(endpoint) = self.config.fiberlb_endpoint.as_ref() else { return Ok(()); }; let mut backend_client = BackendServiceClient::connect(endpoint.clone()).await?; let mut listener_client = ListenerServiceClient::connect(endpoint.clone()).await?; let mut pool_client = PoolServiceClient::connect(endpoint.clone()).await?; let mut lb_client = LoadBalancerServiceClient::connect(endpoint.clone()).await?; if !lb_state.pool_id.is_empty() { if let Ok(response) = backend_client .list_backends(authorized_request( ListBackendsRequest { pool_id: lb_state.pool_id.clone(), page_size: 256, page_token: String::new(), }, auth_token, )) .await { for backend in response.into_inner().backends { delete_backend(&mut backend_client, auth_token, &backend.id).await?; } } } if !lb_state.listener_id.is_empty() { match listener_client .delete_listener(authorized_request( DeleteListenerRequest { id: lb_state.listener_id.clone(), }, auth_token, )) .await { Ok(_) => {} Err(status) if status.code() == Code::NotFound => {} Err(status) => return Err(status.into()), } } if !lb_state.pool_id.is_empty() { match pool_client .delete_pool(authorized_request( DeletePoolRequest { id: lb_state.pool_id.clone(), }, auth_token, )) .await { Ok(_) => {} Err(status) if status.code() == Code::NotFound => {} Err(status) => return Err(status.into()), } } if !lb_state.id.is_empty() { match lb_client .delete_load_balancer(authorized_request( DeleteLoadBalancerRequest { id: lb_state.id.clone(), }, auth_token, )) .await { Ok(_) => {} Err(status) if status.code() == Code::NotFound => {} Err(status) => return Err(status.into()), } } Ok(()) } } async fn ensure_load_balancer( client: &mut LoadBalancerServiceClient, auth_token: &str, existing: Option<&PublishedLoadBalancerState>, org_id: &str, project_id: &str, name: &str, ) -> Result { let current = client .list_load_balancers(authorized_request( ListLoadBalancersRequest { org_id: org_id.to_string(), project_id: project_id.to_string(), page_size: 256, page_token: String::new(), }, auth_token, )) .await? .into_inner() .loadbalancers .into_iter() .find(|lb| { existing.map(|state| state.id.as_str()) == Some(lb.id.as_str()) || lb.name == name }); if let Some(load_balancer) = current { return Ok(load_balancer); } Ok(client .create_load_balancer(authorized_request( CreateLoadBalancerRequest { name: name.to_string(), org_id: org_id.to_string(), project_id: project_id.to_string(), description: format!("native runtime service {name}"), vip_address: String::new(), }, auth_token, )) .await? .into_inner() .loadbalancer .context("FiberLB returned empty CreateLoadBalancer response")?) } async fn ensure_pool( client: &mut PoolServiceClient, auth_token: &str, existing: Option<&PublishedLoadBalancerState>, load_balancer_id: &str, name: &str, spec: &LoadBalancerPublicationSpec, service: &ServiceSpec, ) -> Result { let current = client .list_pools(authorized_request( ListPoolsRequest { loadbalancer_id: load_balancer_id.to_string(), page_size: 256, page_token: String::new(), }, auth_token, )) .await? .into_inner() .pools .into_iter() .find(|pool| { existing.map(|state| state.pool_id.as_str()) == Some(pool.id.as_str()) || pool.name == name }); if let Some(pool) = current { return Ok(pool); } Ok(client .create_pool(authorized_request( CreatePoolRequest { name: name.to_string(), loadbalancer_id: load_balancer_id.to_string(), algorithm: PoolAlgorithm::RoundRobin as i32, protocol: pool_protocol(spec, service) as i32, session_persistence: None, }, auth_token, )) .await? .into_inner() .pool .context("FiberLB returned empty CreatePool response")?) } async fn ensure_listener( client: &mut ListenerServiceClient, auth_token: &str, existing: Option<&PublishedLoadBalancerState>, load_balancer_id: &str, name: &str, port: u16, default_pool_id: &str, spec: &LoadBalancerPublicationSpec, service: &ServiceSpec, ) -> Result { let listeners = client .list_listeners(authorized_request( ListListenersRequest { loadbalancer_id: load_balancer_id.to_string(), page_size: 256, page_token: String::new(), }, auth_token, )) .await? .into_inner() .listeners; if let Some(listener) = listeners.iter().find(|listener| { existing.map(|state| state.listener_id.as_str()) == Some(listener.id.as_str()) || listener.name == name }) { let listener = listener.clone(); if listener.port == port as u32 && listener.protocol == listener_protocol(spec, service) as i32 && listener.default_pool_id == default_pool_id { return Ok(listener); } client .delete_listener(authorized_request( DeleteListenerRequest { id: listener.id.clone(), }, auth_token, )) .await?; } Ok(client .create_listener(authorized_request( CreateListenerRequest { name: name.to_string(), loadbalancer_id: load_balancer_id.to_string(), protocol: listener_protocol(spec, service) as i32, port: port as u32, default_pool_id: default_pool_id.to_string(), tls_config: None, connection_limit: 0, }, auth_token, )) .await? .into_inner() .listener .context("FiberLB returned empty CreateListener response")?) } async fn reconcile_backends( client: &mut BackendServiceClient, auth_token: &str, pool_id: &str, service: &ServiceSpec, healthy_instances: &[ServiceInstanceSpec], ) -> Result<()> { let existing = client .list_backends(authorized_request( ListBackendsRequest { pool_id: pool_id.to_string(), page_size: 256, page_token: String::new(), }, auth_token, )) .await? .into_inner() .backends; let desired_names: HashSet = healthy_instances .iter() .map(|instance| backend_name_for_instance(service, instance)) .collect(); for backend in &existing { if !desired_names.contains(&backend.name) { delete_backend(client, auth_token, &backend.id).await?; } } for instance in healthy_instances { let backend_name = backend_name_for_instance(service, instance); let matching = existing.iter().find(|backend| backend.name == backend_name); if let Some(backend) = matching { if backend.address == instance.ip && backend.port == instance.port as u32 && backend.admin_state == BackendAdminState::Enabled as i32 { continue; } if backend.address != instance.ip || backend.port != instance.port as u32 { delete_backend(client, auth_token, &backend.id).await?; } else { client .update_backend(authorized_request( UpdateBackendRequest { id: backend.id.clone(), name: backend.name.clone(), weight: backend.weight, admin_state: BackendAdminState::Enabled as i32, }, auth_token, )) .await?; continue; } } client .create_backend(authorized_request( CreateBackendRequest { name: backend_name, pool_id: pool_id.to_string(), address: instance.ip.clone(), port: instance.port as u32, weight: 1, }, auth_token, )) .await?; } Ok(()) } async fn delete_backend( client: &mut BackendServiceClient, auth_token: &str, backend_id: &str, ) -> Result<()> { match client .delete_backend(authorized_request( DeleteBackendRequest { id: backend_id.to_string(), }, auth_token, )) .await { Ok(_) => Ok(()), Err(status) if status.code() == Code::NotFound => Ok(()), Err(status) => Err(status.into()), } } async fn ensure_zone( client: &mut ZoneServiceClient, auth_token: &str, zone_name: &str, org_id: &str, project_id: &str, ) -> Result { let response = client .list_zones(authorized_request( ListZonesRequest { org_id: org_id.to_string(), project_id: project_id.to_string(), name_filter: zone_name.to_string(), page_size: 256, page_token: String::new(), }, auth_token, )) .await?; if let Some(zone) = response .into_inner() .zones .into_iter() .find(|zone| normalize_zone_name(&zone.name) == zone_name) { return Ok(zone); } Ok(client .create_zone(authorized_request( CreateZoneRequest { name: zone_name.to_string(), org_id: org_id.to_string(), project_id: project_id.to_string(), primary_ns: "ns1.native.cluster".to_string(), admin_email: "admin@native.cluster".to_string(), }, auth_token, )) .await? .into_inner() .zone .context("FlashDNS returned empty CreateZone response")?) } async fn ensure_records( client: &mut RecordServiceClient, auth_token: &str, existing: Option<&PublishedDnsRecordState>, zone_id: &str, name: &str, ttl: u32, desired_values: &[String], ) -> Result> { let records = client .list_records(authorized_request( ListRecordsRequest { zone_id: zone_id.to_string(), name_filter: name.to_string(), type_filter: "A".to_string(), page_size: 256, page_token: String::new(), }, auth_token, )) .await? .into_inner() .records; let mut matching = records .iter() .filter(|record| { record.name == name || existing.map(|state| state.record_id.as_str()) == Some(record.id.as_str()) || existing .map(|state| state.record_ids.iter().any(|id| id == &record.id)) .unwrap_or(false) }) .cloned() .collect::>(); matching.sort_by(|lhs, rhs| { record_a_value(lhs) .cmp(&record_a_value(rhs)) .then_with(|| lhs.id.cmp(&rhs.id)) }); let mut records_by_value: BTreeMap> = BTreeMap::new(); for record in matching { let Some(value) = record_a_value(&record) else { continue; }; records_by_value.entry(value).or_default().push(record); } let mut ensured = Vec::new(); for desired_value in desired_values { if let Some(record) = records_by_value.get_mut(desired_value).and_then(|records| { if records.is_empty() { None } else { Some(records.remove(0)) } }) { if record.ttl != ttl || !record.enabled { let updated = client .update_record(authorized_request( UpdateRecordRequest { id: record.id.clone(), ttl: Some(ttl), data: Some(RecordData { data: Some(record_data::Data::A(ARecord { address: desired_value.to_string(), })), }), enabled: Some(true), }, auth_token, )) .await? .into_inner() .record .context("FlashDNS returned empty UpdateRecord response")?; ensured.push(updated); } else { ensured.push(record); } continue; } ensured.push( client .create_record(authorized_request( CreateRecordRequest { zone_id: zone_id.to_string(), name: name.to_string(), record_type: "A".to_string(), ttl, data: Some(RecordData { data: Some(record_data::Data::A(ARecord { address: desired_value.to_string(), })), }), }, auth_token, )) .await? .into_inner() .record .context("FlashDNS returned empty CreateRecord response")?, ); } for extra in records_by_value.into_values().flatten() { delete_record(client, auth_token, &extra.id).await?; } ensured.sort_by(|lhs, rhs| { record_a_value(lhs) .cmp(&record_a_value(rhs)) .then_with(|| lhs.id.cmp(&rhs.id)) }); Ok(ensured) } async fn delete_record( client: &mut RecordServiceClient, auth_token: &str, record_id: &str, ) -> Result<()> { match client .delete_record(authorized_request( DeleteRecordRequest { id: record_id.to_string(), }, auth_token, )) .await { Ok(_) => Ok(()), Err(status) if status.code() == Code::NotFound => Ok(()), Err(status) => Err(status.into()), } } fn resolve_target_port(service: &ServiceSpec, spec: &LoadBalancerPublicationSpec) -> Option { spec.listener_port .or_else(|| { service .schedule .as_ref() .and_then(|schedule| schedule.instance_port) }) .or_else(|| service.ports.as_ref().and_then(|ports| ports.http)) .or_else(|| service.ports.as_ref().and_then(|ports| ports.grpc)) } fn record_a_value(record: &RecordInfo) -> Option { record .data .as_ref() .and_then(|data| data.data.as_ref()) .and_then(|data| match data { record_data::Data::A(record) => Some(record.address.clone()), _ => None, }) } fn normalize_dns_values(values: impl IntoIterator) -> Vec { let mut values = values .into_iter() .map(|value| value.trim().to_string()) .filter(|value| !value.is_empty()) .collect::>(); values.sort(); values.dedup(); values } fn desired_dns_values( spec: &DnsPublicationSpec, healthy_instances: &[ServiceInstanceSpec], load_balancer: Option<&PublishedLoadBalancerState>, ) -> Vec { match spec.mode { DnsPublishMode::LoadBalancer => normalize_dns_values( load_balancer .and_then(|state| state.vip_address.clone()) .filter(|value| !value.is_empty() && value != "0.0.0.0") .or_else(|| { healthy_instances .first() .map(|instance| instance.ip.clone()) }) .into_iter(), ), DnsPublishMode::Direct => normalize_dns_values( healthy_instances .first() .map(|instance| instance.ip.clone()) .into_iter(), ), DnsPublishMode::DirectMulti => { normalize_dns_values(healthy_instances.iter().map(|instance| instance.ip.clone())) } } } fn instance_is_publishable(instance: &ServiceInstanceSpec, heartbeat_timeout_secs: u64) -> bool { if instance.state.as_deref() != Some("healthy") { return false; } if heartbeat_timeout_secs == 0 { return true; } let Some(last_heartbeat) = instance.last_heartbeat.or(instance.observed_at) else { return false; }; Utc::now() .signed_duration_since(last_heartbeat) .num_seconds() <= heartbeat_timeout_secs as i64 } fn record_name_for_service(spec: &DnsPublicationSpec, service: &ServiceSpec) -> String { let zone_name = normalize_zone_name(&spec.zone); let raw_name = spec.name.clone().unwrap_or_else(|| service.name.clone()); let trimmed = raw_name.trim_end_matches('.').to_string(); let suffix = format!(".{}", zone_name); trimmed .strip_suffix(&suffix) .unwrap_or(trimmed.as_str()) .to_string() } fn normalize_zone_name(zone: &str) -> String { zone.trim_end_matches('.').to_string() } fn sanitize_name(value: &str) -> String { value .chars() .map(|ch| { if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_') { ch } else { '-' } }) .collect() } fn backend_name_for_instance(service: &ServiceSpec, instance: &ServiceInstanceSpec) -> String { sanitize_name(&format!("{}-{}", service.name, instance.instance_id)) } fn listener_protocol( spec: &LoadBalancerPublicationSpec, service: &ServiceSpec, ) -> ListenerProtocol { match spec .protocol .as_deref() .or(service.protocol.as_deref()) .unwrap_or("tcp") { "http" => ListenerProtocol::Http, "https" => ListenerProtocol::Https, "terminated_https" => ListenerProtocol::TerminatedHttps, "udp" => ListenerProtocol::Udp, _ => ListenerProtocol::Tcp, } } fn pool_protocol(spec: &LoadBalancerPublicationSpec, service: &ServiceSpec) -> PoolProtocol { match spec .pool_protocol .as_deref() .or(spec.protocol.as_deref()) .or(service.protocol.as_deref()) .unwrap_or("tcp") { "http" => PoolProtocol::Http, "https" => PoolProtocol::Https, "udp" => PoolProtocol::Udp, _ => PoolProtocol::Tcp, } } fn empty_to_none(value: String) -> Option { if value.trim().is_empty() { None } else { Some(value) } } fn publication_key(cluster_namespace: &str, cluster_id: &str, service: &str) -> Vec { format!( "{}/clusters/{}/publications/{}", cluster_namespace, cluster_id, service ) .into_bytes() } async fn load_publication_states( client: &mut Client, cluster_namespace: &str, cluster_id: &str, ) -> Result> { let prefix = format!( "{}/clusters/{}/publications/", cluster_namespace, cluster_id ); let kvs = client.get_prefix(prefix.as_bytes()).await?; let mut states = HashMap::with_capacity(kvs.len()); for (_key, value) in kvs { match serde_json::from_slice::(&value) { Ok(state) => { states.insert(state.service.clone(), state); } Err(error) => warn!(error = %error, "failed to decode service publication state"), } } Ok(states) } async fn load_service_instances( client: &mut Client, cluster_namespace: &str, cluster_id: &str, service: &str, ) -> Result> { let prefix = format!( "{}/clusters/{}/instances/{}/", cluster_namespace, cluster_id, service ); let kvs = client.get_prefix(prefix.as_bytes()).await?; let mut instances = Vec::with_capacity(kvs.len()); for (_key, value) in kvs { match serde_json::from_slice::(&value) { Ok(instance) => instances.push(instance), Err(error) => { warn!(service = %service, error = %error, "failed to decode service instance") } } } instances.sort_by(|lhs, rhs| lhs.instance_id.cmp(&rhs.instance_id)); Ok(instances) } #[cfg(test)] mod tests { use super::*; use deployer_types::{ServicePorts, ServicePublicationSpec, ServiceScheduleSpec}; fn service_with_dns(name: &str, zone: &str) -> ServiceSpec { ServiceSpec { name: name.to_string(), ports: Some(ServicePorts { http: Some(8080), grpc: None, }), protocol: Some("http".to_string()), mtls_required: None, mesh_mode: None, schedule: Some(ServiceScheduleSpec::default()), publish: Some(ServicePublicationSpec { org_id: Some("default-org".to_string()), project_id: Some("default-project".to_string()), dns: Some(DnsPublicationSpec { zone: zone.to_string(), name: None, ttl: 60, mode: DnsPublishMode::LoadBalancer, }), load_balancer: None, }), } } #[test] fn test_record_name_strips_zone_suffix() { let spec = DnsPublicationSpec { zone: "native.cluster.test".to_string(), name: Some("api.native.cluster.test".to_string()), ttl: 60, mode: DnsPublishMode::LoadBalancer, }; let service = service_with_dns("api", "native.cluster.test"); assert_eq!(record_name_for_service(&spec, &service), "api"); } #[test] fn test_dns_values_fall_back_to_healthy_instance_when_vip_missing() { let spec = DnsPublicationSpec { zone: "native.cluster.test".to_string(), name: Some("api".to_string()), ttl: 60, mode: DnsPublishMode::LoadBalancer, }; let instances = vec![ServiceInstanceSpec { instance_id: "api-node01".to_string(), service: "api".to_string(), node_id: "node01".to_string(), ip: "10.0.0.11".to_string(), port: 8080, mesh_port: None, version: None, health_check: None, process: None, container: None, managed_by: None, state: Some("healthy".to_string()), last_heartbeat: None, observed_at: None, }]; assert_eq!( desired_dns_values(&spec, &instances, None), vec!["10.0.0.11".to_string()] ); } #[test] fn test_direct_multi_dns_publishes_all_healthy_instance_ips() { let spec = DnsPublicationSpec { zone: "native.cluster.test".to_string(), name: Some("daemon".to_string()), ttl: 60, mode: DnsPublishMode::DirectMulti, }; let instances = vec![ ServiceInstanceSpec { instance_id: "daemon-node02".to_string(), service: "daemon".to_string(), node_id: "node02".to_string(), ip: "10.0.0.12".to_string(), port: 8080, mesh_port: None, version: None, health_check: None, process: None, container: None, managed_by: None, state: Some("healthy".to_string()), last_heartbeat: None, observed_at: None, }, ServiceInstanceSpec { instance_id: "daemon-node01".to_string(), service: "daemon".to_string(), node_id: "node01".to_string(), ip: "10.0.0.11".to_string(), port: 8080, mesh_port: None, version: None, health_check: None, process: None, container: None, managed_by: None, state: Some("healthy".to_string()), last_heartbeat: None, observed_at: None, }, ServiceInstanceSpec { instance_id: "daemon-node03".to_string(), service: "daemon".to_string(), node_id: "node03".to_string(), ip: "10.0.0.11".to_string(), port: 8080, mesh_port: None, version: None, health_check: None, process: None, container: None, managed_by: None, state: Some("healthy".to_string()), last_heartbeat: None, observed_at: None, }, ]; assert_eq!( desired_dns_values(&spec, &instances, None), vec!["10.0.0.11".to_string(), "10.0.0.12".to_string()] ); } #[test] fn test_publishable_instance_requires_fresh_heartbeat() { let now = Utc::now(); let mut fresh = ServiceInstanceSpec { instance_id: "api-node01".to_string(), service: "api".to_string(), node_id: "node01".to_string(), ip: "10.0.0.11".to_string(), port: 8080, mesh_port: None, version: None, health_check: None, process: None, container: None, managed_by: None, state: Some("healthy".to_string()), last_heartbeat: Some(now), observed_at: None, }; assert!(instance_is_publishable(&fresh, 60)); fresh.last_heartbeat = Some(now - chrono::Duration::seconds(120)); assert!(!instance_is_publishable(&fresh, 60)); fresh.last_heartbeat = Some(now); fresh.state = Some("unhealthy".to_string()); assert!(!instance_is_publishable(&fresh, 60)); } }