photoncloud-monorepo/baremetal/first-boot/bootstrap-detector.sh

89 lines
3.1 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
# bootstrap-detector.sh - Detects if node should bootstrap or join cluster
# Usage: bootstrap-detector.sh [config_file]
#
# Arguments:
# config_file - Path to cluster-config.json (default: /etc/nixos/secrets/cluster-config.json)
#
# Returns:
# 0 - Node should bootstrap (initialize new cluster)
# 1 - Node should join existing cluster
# 2 - Error (invalid config or missing file)
CONFIG_FILE="${1:-/etc/nixos/secrets/cluster-config.json}"
FIRST_BOOT_MARKER="/var/lib/first-boot-automation/.initialized"
# Logging function with JSON output
log() {
local level="$1"
local message="$2"
local timestamp
timestamp=$(date -Iseconds)
echo "{\"timestamp\":\"$timestamp\",\"level\":\"$level\",\"component\":\"bootstrap-detector\",\"message\":\"$message\"}" >&2
}
# Validate config file exists
if [[ ! -f "$CONFIG_FILE" ]]; then
log "ERROR" "Configuration file not found: $CONFIG_FILE"
exit 2
fi
# Parse JSON config
log "INFO" "Reading configuration from $CONFIG_FILE"
if ! CONFIG_JSON=$(cat "$CONFIG_FILE"); then
log "ERROR" "Failed to read configuration file"
exit 2
fi
# Extract bootstrap flag using jq (fallback to grep if jq not available)
if command -v jq &> /dev/null; then
BOOTSTRAP=$(echo "$CONFIG_JSON" | jq -r '.bootstrap // false')
NODE_ID=$(echo "$CONFIG_JSON" | jq -r '.node_id // "unknown"')
NODE_ROLE=$(echo "$CONFIG_JSON" | jq -r '.node_role // "unknown"')
else
# Fallback to grep/sed for minimal environments
BOOTSTRAP=$(echo "$CONFIG_JSON" | grep -Eo '"bootstrap"[[:space:]]*:[[:space:]]*(true|false)' | head -n1 | sed -E 's/.*:[[:space:]]*(true|false)/\1/' || echo "false")
NODE_ID=$(echo "$CONFIG_JSON" | grep -Eo '"node_id"[[:space:]]*:[[:space:]]*"[^"]+"' | head -n1 | sed -E 's/.*"node_id"[[:space:]]*:[[:space:]]*"([^"]+)".*/\1/' || echo "unknown")
NODE_ROLE=$(echo "$CONFIG_JSON" | grep -Eo '"node_role"[[:space:]]*:[[:space:]]*"[^"]+"' | head -n1 | sed -E 's/.*"node_role"[[:space:]]*:[[:space:]]*"([^"]+)".*/\1/' || echo "unknown")
fi
log "INFO" "Node configuration: id=$NODE_ID, role=$NODE_ROLE, bootstrap=$BOOTSTRAP"
# Check if this is a reboot (marker file exists)
if [[ -f "$FIRST_BOOT_MARKER" ]]; then
log "INFO" "First-boot marker found, this is a reboot - skipping cluster join"
# Read marker info
if [[ -r "$FIRST_BOOT_MARKER" ]]; then
MARKER_TIMESTAMP=$(cat "$FIRST_BOOT_MARKER")
log "INFO" "Node initialized at: $MARKER_TIMESTAMP"
fi
# Always join for reboots (clusters should already be initialized)
exit 1
fi
# First boot logic
log "INFO" "First boot detected (no marker file)"
# Decision based on bootstrap flag
if [[ "$BOOTSTRAP" == "true" ]]; then
log "INFO" "Bootstrap mode enabled - node will initialize new cluster"
# Create marker directory and file to track initialization
mkdir -p "$(dirname "$FIRST_BOOT_MARKER")"
date -Iseconds > "$FIRST_BOOT_MARKER"
exit 0 # Bootstrap
else
log "INFO" "Join mode enabled - node will join existing cluster"
# Create marker after successful join (done by cluster-join.sh)
# For now, just return join status
exit 1 # Join existing
fi