photoncloud-monorepo/baremetal/first-boot/health-check.sh
centra 5c6eb04a46 T036: Add VM cluster deployment configs for nixos-anywhere
- netboot-base.nix with SSH key auth
- Launch scripts for node01/02/03
- Node configuration.nix and disko.nix
- Nix modules for first-boot automation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 09:59:19 +09:00

72 lines
2.2 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
# health-check.sh - Health check wrapper for services
# Usage: health-check.sh <service_name> <health_url> [timeout] [retry_interval]
#
# Arguments:
# service_name - Name of the service (for logging)
# health_url - HTTP/HTTPS URL of the health endpoint
# timeout - Maximum time to wait in seconds (default: 300)
# retry_interval - Time between retries in seconds (default: 5)
#
# Returns:
# 0 - Service is healthy
# 1 - Service is unhealthy (timeout reached)
SERVICE_NAME="${1:-}"
HEALTH_URL="${2:-}"
TIMEOUT="${3:-300}"
RETRY_INTERVAL="${4:-5}"
# Validate arguments
if [[ -z "$SERVICE_NAME" || -z "$HEALTH_URL" ]]; then
echo "ERROR: Missing required arguments" >&2
echo "Usage: $0 <service_name> <health_url> [timeout] [retry_interval]" >&2
exit 1
fi
# Logging function with JSON output
log() {
local level="$1"
local message="$2"
local timestamp
timestamp=$(date -Iseconds)
echo "{\"timestamp\":\"$timestamp\",\"level\":\"$level\",\"service\":\"$SERVICE_NAME\",\"message\":\"$message\"}" >&2
}
# Main health check loop
log "INFO" "Starting health check for $SERVICE_NAME at $HEALTH_URL (timeout: ${TIMEOUT}s)"
START_TIME=$(date +%s)
ATTEMPT=0
while true; do
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
if [[ $ELAPSED -ge $TIMEOUT ]]; then
log "ERROR" "Health check timeout reached after ${ELAPSED}s"
exit 1
fi
ATTEMPT=$((ATTEMPT + 1))
log "INFO" "Health check attempt $ATTEMPT (elapsed: ${ELAPSED}s)"
# Perform health check (allow insecure TLS for self-signed certs)
HTTP_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" "$HEALTH_URL" 2>/dev/null || echo "000")
if [[ "$HTTP_CODE" == "200" ]]; then
log "INFO" "Health check passed (HTTP $HTTP_CODE)"
echo "{\"timestamp\":\"$(date -Iseconds)\",\"service\":\"$SERVICE_NAME\",\"status\":\"healthy\",\"attempts\":$ATTEMPT,\"elapsed\":${ELAPSED}}"
exit 0
elif [[ "$HTTP_CODE" == "000" ]]; then
log "WARN" "Health check failed: connection error (attempt $ATTEMPT)"
else
log "WARN" "Health check failed: HTTP $HTTP_CODE (attempt $ATTEMPT)"
fi
sleep "$RETRY_INTERVAL"
done