#!/usr/bin/env bash set -euo pipefail # health-check.sh - Health check wrapper for services # Usage: health-check.sh [timeout] [retry_interval] # # Arguments: # service_name - Name of the service (for logging) # health_url - HTTP/HTTPS URL of the health endpoint # timeout - Maximum time to wait in seconds (default: 300) # retry_interval - Time between retries in seconds (default: 5) # # Returns: # 0 - Service is healthy # 1 - Service is unhealthy (timeout reached) SERVICE_NAME="${1:-}" HEALTH_URL="${2:-}" TIMEOUT="${3:-300}" RETRY_INTERVAL="${4:-5}" CURL_CONNECT_TIMEOUT="${CURL_CONNECT_TIMEOUT:-5}" CURL_MAX_TIME="${CURL_MAX_TIME:-10}" CURL_INSECURE="${CURL_INSECURE:-1}" # Validate arguments if [[ -z "$SERVICE_NAME" || -z "$HEALTH_URL" ]]; then echo "ERROR: Missing required arguments" >&2 echo "Usage: $0 [timeout] [retry_interval]" >&2 exit 1 fi # Logging function with JSON output log() { local level="$1" local message="$2" local timestamp timestamp=$(date -Iseconds) echo "{\"timestamp\":\"$timestamp\",\"level\":\"$level\",\"service\":\"$SERVICE_NAME\",\"message\":\"$message\"}" >&2 } # Main health check loop log "INFO" "Starting health check for $SERVICE_NAME at $HEALTH_URL (timeout: ${TIMEOUT}s)" START_TIME=$(date +%s) ATTEMPT=0 while true; do CURRENT_TIME=$(date +%s) ELAPSED=$((CURRENT_TIME - START_TIME)) if [[ $ELAPSED -ge $TIMEOUT ]]; then log "ERROR" "Health check timeout reached after ${ELAPSED}s" exit 1 fi ATTEMPT=$((ATTEMPT + 1)) log "INFO" "Health check attempt $ATTEMPT (elapsed: ${ELAPSED}s)" # Perform health check (allow insecure TLS if configured) CURL_FLAGS=(-s -o /dev/null -w "%{http_code}" --connect-timeout "$CURL_CONNECT_TIMEOUT" --max-time "$CURL_MAX_TIME") if [[ "$CURL_INSECURE" == "1" ]]; then CURL_FLAGS+=(-k) fi HTTP_CODE=$(curl "${CURL_FLAGS[@]}" "$HEALTH_URL" 2>/dev/null || echo "000") if [[ "$HTTP_CODE" == "200" ]]; then log "INFO" "Health check passed (HTTP $HTTP_CODE)" echo "{\"timestamp\":\"$(date -Iseconds)\",\"service\":\"$SERVICE_NAME\",\"status\":\"healthy\",\"attempts\":$ATTEMPT,\"elapsed\":${ELAPSED}}" exit 0 elif [[ "$HTTP_CODE" == "000" ]]; then log "WARN" "Health check failed: connection error (attempt $ATTEMPT)" else log "WARN" "Health check failed: HTTP $HTTP_CODE (attempt $ATTEMPT)" fi sleep "$RETRY_INTERVAL" done