#!/usr/bin/env bash set -euo pipefail export PATH="/run/current-system/sw/bin:/usr/bin:/bin:${PATH}" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="${ULTRACLOUD_REPO_ROOT:-$(cd "${SCRIPT_DIR}/../.." && pwd)}" TASK_ID="3dba03d3-525b-4079-8c93-90af6a89d32b" WORK_ROOT="${ULTRACLOUD_WORK_ROOT:-${REPO_ROOT}/work}" RUN_ID="${ULTRACLOUD_HARDWARE_RUN_ID:-$(date +%Y%m%dT%H%M%S%z)}" PROOF_BASE="${WORK_ROOT}/hardware-smoke" PROOF_ROOT="${ULTRACLOUD_HARDWARE_PROOF_ROOT:-${PROOF_BASE}/${RUN_ID}}" LATEST_LINK="${PROOF_BASE}/latest" REQUESTED_MODE="${1:-preflight}" REQUESTED_TRANSPORT="${ULTRACLOUD_HARDWARE_TRANSPORT:-auto}" RESOLVED_TRANSPORT="" DEPLOYER_URL="${ULTRACLOUD_HARDWARE_DEPLOYER_URL:-}" BOOTSTRAP_TOKEN="${ULTRACLOUD_HARDWARE_BOOTSTRAP_TOKEN:-}" ALLOW_UNAUTHENTICATED="${ULTRACLOUD_HARDWARE_ALLOW_UNAUTHENTICATED:-0}" CA_CERT_URL="${ULTRACLOUD_HARDWARE_CA_CERT_URL:-}" BINARY_CACHE_URL="${ULTRACLOUD_HARDWARE_BINARY_CACHE_URL:-}" NODE_ID_OVERRIDE="${ULTRACLOUD_HARDWARE_NODE_ID:-}" HOSTNAME_OVERRIDE="${ULTRACLOUD_HARDWARE_HOSTNAME:-}" NODE_ROLE="${ULTRACLOUD_HARDWARE_NODE_ROLE:-control-plane}" ISO_ATTR=".#nixosConfigurations.ultracloud-iso.config.system.build.isoImage" ISO_PATH_OVERRIDE="${ULTRACLOUD_HARDWARE_ISO_PATH:-}" ISO_URL="${ULTRACLOUD_HARDWARE_ISO_URL:-}" USB_DEVICE="${ULTRACLOUD_HARDWARE_USB_DEVICE:-}" ALLOW_DESTRUCTIVE="${ULTRACLOUD_HARDWARE_ALLOW_DESTRUCTIVE:-}" REDFISH_ENDPOINT="${ULTRACLOUD_HARDWARE_REDFISH_ENDPOINT:-${ULTRACLOUD_HARDWARE_BMC_ENDPOINT:-}}" REDFISH_USERNAME="${ULTRACLOUD_HARDWARE_REDFISH_USERNAME:-${ULTRACLOUD_HARDWARE_BMC_USERNAME:-}}" REDFISH_PASSWORD="${ULTRACLOUD_HARDWARE_REDFISH_PASSWORD:-${ULTRACLOUD_HARDWARE_BMC_PASSWORD:-}}" REDFISH_SYSTEM_ID="${ULTRACLOUD_HARDWARE_REDFISH_SYSTEM_ID:-${ULTRACLOUD_HARDWARE_BMC_SYSTEM_ID:-System.Embedded.1}}" REDFISH_MANAGER_ID="${ULTRACLOUD_HARDWARE_REDFISH_MANAGER_ID:-${ULTRACLOUD_HARDWARE_BMC_MANAGER_ID:-iDRAC.Embedded.1}}" REDFISH_VIRTUAL_MEDIA_ID="${ULTRACLOUD_HARDWARE_REDFISH_VIRTUAL_MEDIA_ID:-${ULTRACLOUD_HARDWARE_BMC_VIRTUAL_MEDIA_ID:-CD}}" REDFISH_RESET_TYPE="${ULTRACLOUD_HARDWARE_REDFISH_RESET_TYPE:-ForceRestart}" REDFISH_INSECURE="${ULTRACLOUD_HARDWARE_REDFISH_INSECURE:-0}" SSH_HOST="${ULTRACLOUD_HARDWARE_SSH_HOST:-}" SSH_USER="${ULTRACLOUD_HARDWARE_SSH_USER:-root}" SSH_PORT="${ULTRACLOUD_HARDWARE_SSH_PORT:-22}" SSH_IDENTITY_FILE="${ULTRACLOUD_HARDWARE_SSH_IDENTITY_FILE:-}" SSH_PASSWORD="${ULTRACLOUD_HARDWARE_SSH_PASSWORD:-}" SSH_TIMEOUT_SECS="${ULTRACLOUD_HARDWARE_SSH_TIMEOUT_SECS:-3600}" SERIAL_LOG="${ULTRACLOUD_HARDWARE_SERIAL_LOG:-}" STATUS_FILE="${PROOF_ROOT}/status.env" MISSING_FILE="${PROOF_ROOT}/missing-requirements.txt" HANDOFF_FILE="${PROOF_ROOT}/operator-handoff.md" KERNEL_PARAMS_FILE="${PROOF_ROOT}/kernel-params.txt" EXPECTED_MARKERS_FILE="${PROOF_ROOT}/expected-markers.txt" FAILURE_MARKERS_FILE="${PROOF_ROOT}/failure-markers.txt" ENVIRONMENT_FILE="${PROOF_ROOT}/environment.txt" ISO_REF_FILE="${PROOF_ROOT}/iso-reference.txt" TRANSPORT_LOG="${PROOF_ROOT}/transport.log" CAPTURE_DIR="${PROOF_ROOT}/capture" MISSING_REQUIREMENTS=() log() { printf '[hardware-smoke] %s\n' "$*" } mode_normalized() { case "${REQUESTED_MODE}" in preflight|run|capture) printf '%s\n' "${REQUESTED_MODE}" ;; *) printf 'preflight\n' ;; esac } timestamp() { date -Is } prepare_paths() { mkdir -p "${PROOF_ROOT}" "${CAPTURE_DIR}" "${PROOF_BASE}" ln -sfn "$(basename "${PROOF_ROOT}")" "${LATEST_LINK}" } append_missing() { MISSING_REQUIREMENTS+=("$1") } detect_transport() { case "${REQUESTED_TRANSPORT}" in auto) if [[ -n "${USB_DEVICE}" ]]; then RESOLVED_TRANSPORT="usb" elif [[ -n "${REDFISH_ENDPOINT}" ]]; then RESOLVED_TRANSPORT="redfish" else RESOLVED_TRANSPORT="none" fi ;; usb) RESOLVED_TRANSPORT="usb" ;; bmc|redfish) RESOLVED_TRANSPORT="redfish" ;; *) RESOLVED_TRANSPORT="invalid" append_missing "transport: set ULTRACLOUD_HARDWARE_TRANSPORT=usb|bmc|redfish, or leave auto and provide USB or Redfish inputs" ;; esac } write_iso_reference() { { printf 'iso_attr=%s\n' "${ISO_ATTR}" if [[ -n "${ISO_PATH_OVERRIDE}" ]]; then printf 'iso_path_override=%s\n' "${ISO_PATH_OVERRIDE}" fi if [[ -n "${ISO_URL}" ]]; then printf 'iso_url=%s\n' "${ISO_URL}" fi } >"${ISO_REF_FILE}" } write_kernel_params() { { printf 'ultracloud.deployer_url=%s\n' "${DEPLOYER_URL:-}" if [[ -n "${BOOTSTRAP_TOKEN}" ]]; then printf 'ultracloud.bootstrap_token=%s\n' "${BOOTSTRAP_TOKEN}" elif [[ "${ALLOW_UNAUTHENTICATED}" == "1" ]]; then printf '# ultracloud.bootstrap_token omitted because ULTRACLOUD_HARDWARE_ALLOW_UNAUTHENTICATED=1\n' else printf 'ultracloud.bootstrap_token=\n' fi if [[ -n "${CA_CERT_URL}" ]]; then printf 'ultracloud.ca_cert_url=%s\n' "${CA_CERT_URL}" fi if [[ -n "${BINARY_CACHE_URL}" ]]; then printf 'ultracloud.binary_cache_url=%s\n' "${BINARY_CACHE_URL}" fi if [[ -n "${NODE_ID_OVERRIDE}" ]]; then printf 'ultracloud.node_id=%s\n' "${NODE_ID_OVERRIDE}" fi if [[ -n "${HOSTNAME_OVERRIDE}" ]]; then printf 'ultracloud.hostname=%s\n' "${HOSTNAME_OVERRIDE}" fi } >"${KERNEL_PARAMS_FILE}" } write_expected_markers() { { printf 'ULTRACLOUD_MARKER pre-install.boot.\n' printf 'ULTRACLOUD_MARKER pre-install.phone-home.complete.\n' printf 'ULTRACLOUD_MARKER install.bundle-downloaded.\n' printf 'ULTRACLOUD_MARKER install.disko.complete.\n' printf 'ULTRACLOUD_MARKER install.nixos-install.complete.\n' printf 'ULTRACLOUD_MARKER reboot.\n' printf 'ULTRACLOUD_MARKER post-install.boot..%s\n' "${NODE_ROLE}" printf 'ULTRACLOUD_MARKER desired-system-active.\n' } >"${EXPECTED_MARKERS_FILE}" } write_failure_markers() { cat >"${FAILURE_MARKERS_FILE}" <<'EOF' missing transport inputs missing ultracloud.deployer_url kernel parameter missing bootstrap token or unauthenticated bootstrap acknowledgement missing USB device missing Redfish/BMC endpoint missing Redfish/BMC credentials missing Redfish ISO URL missing capture channel (SSH or serial log) missing destructive acknowledgement for USB write phone-home marker not observed install.disko.complete marker not observed reboot marker not observed desired-system-active marker not observed nix-agent.service inactive after install chainfire.service inactive after install on control-plane node EOF } write_environment() { { printf 'task_id=%s\n' "${TASK_ID}" printf 'mode=%s\n' "$(mode_normalized)" printf 'started_at=%s\n' "$(timestamp)" printf 'repo_root=%s\n' "${REPO_ROOT}" printf 'work_root=%s\n' "${WORK_ROOT}" printf 'proof_root=%s\n' "${PROOF_ROOT}" printf 'requested_transport=%s\n' "${REQUESTED_TRANSPORT}" printf 'resolved_transport=%s\n' "${RESOLVED_TRANSPORT}" printf 'node_role=%s\n' "${NODE_ROLE}" printf 'node_id_override=%s\n' "${NODE_ID_OVERRIDE:-}" printf 'hostname_override=%s\n' "${HOSTNAME_OVERRIDE:-}" printf 'deployer_url_set=%s\n' "$([[ -n "${DEPLOYER_URL}" ]] && echo yes || echo no)" printf 'bootstrap_token_set=%s\n' "$([[ -n "${BOOTSTRAP_TOKEN}" ]] && echo yes || echo no)" printf 'allow_unauthenticated=%s\n' "${ALLOW_UNAUTHENTICATED}" printf 'binary_cache_url=%s\n' "${BINARY_CACHE_URL:-}" printf 'ca_cert_url=%s\n' "${CA_CERT_URL:-}" printf 'usb_device=%s\n' "${USB_DEVICE:-}" printf 'redfish_endpoint=%s\n' "${REDFISH_ENDPOINT:-}" printf 'redfish_system_id=%s\n' "${REDFISH_SYSTEM_ID}" printf 'redfish_manager_id=%s\n' "${REDFISH_MANAGER_ID}" printf 'redfish_virtual_media_id=%s\n' "${REDFISH_VIRTUAL_MEDIA_ID}" printf 'iso_url=%s\n' "${ISO_URL:-}" printf 'ssh_host=%s\n' "${SSH_HOST:-}" printf 'ssh_port=%s\n' "${SSH_PORT}" printf 'serial_log=%s\n' "${SERIAL_LOG:-}" } >"${ENVIRONMENT_FILE}" } write_missing_requirements() { : >"${MISSING_FILE}" if (( ${#MISSING_REQUIREMENTS[@]} == 0 )); then printf 'none\n' >"${MISSING_FILE}" return 0 fi local item for item in "${MISSING_REQUIREMENTS[@]}"; do printf '%s\n' "${item}" >>"${MISSING_FILE}" done } write_operator_handoff() { cat >"${HANDOFF_FILE}" <"${STATUS_FILE}" } resolve_iso_image() { local candidate="$1" if [[ -f "${candidate}" ]]; then printf '%s\n' "${candidate}" return 0 fi if [[ -d "${candidate}/iso" ]]; then find "${candidate}/iso" -maxdepth 1 -type f -name '*.iso' | head -n 1 return 0 fi find "${candidate}" -maxdepth 1 -type f -name '*.iso' | head -n 1 } materialize_iso_for_usb() { if [[ -n "${ISO_PATH_OVERRIDE}" ]]; then printf '%s\n' "${ISO_PATH_OVERRIDE}" return 0 fi local out out="$(nix build "${REPO_ROOT}#nixosConfigurations.ultracloud-iso.config.system.build.isoImage" --no-link --print-out-paths)" resolve_iso_image "${out}" } copy_serial_log_if_present() { if [[ -n "${SERIAL_LOG}" && -f "${SERIAL_LOG}" ]]; then cp "${SERIAL_LOG}" "${CAPTURE_DIR}/serial.log" fi } run_ssh() { local cmd="$1" local ssh_opts=( -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 -p "${SSH_PORT}" ) if [[ -n "${SSH_IDENTITY_FILE}" ]]; then ssh_opts+=(-i "${SSH_IDENTITY_FILE}") fi if [[ -n "${SSH_PASSWORD}" ]]; then sshpass -p "${SSH_PASSWORD}" ssh "${ssh_opts[@]}" "${SSH_USER}@${SSH_HOST}" "${cmd}" else ssh "${ssh_opts[@]}" "${SSH_USER}@${SSH_HOST}" "${cmd}" fi } wait_for_ssh() { local timeout_secs="$1" local started started="$(date +%s)" while (( $(date +%s) - started < timeout_secs )); do if run_ssh 'true' >/dev/null 2>&1; then return 0 fi sleep 10 done return 1 } capture_over_ssh() { [[ -n "${SSH_HOST}" ]] || return 1 log "waiting for SSH on ${SSH_HOST}:${SSH_PORT}" wait_for_ssh "${SSH_TIMEOUT_SECS}" || return 1 run_ssh 'hostnamectl --static 2>/dev/null || hostname' >"${CAPTURE_DIR}/hostname.txt" || true run_ssh 'readlink -f /run/current-system || true' >"${CAPTURE_DIR}/current-system.txt" || true run_ssh 'journalctl -b --no-pager || true' >"${CAPTURE_DIR}/journal-boot.log" || true run_ssh 'journalctl -b -u nix-agent.service --no-pager || true' >"${CAPTURE_DIR}/journal-nix-agent.log" || true run_ssh 'systemctl is-active nix-agent.service || true' >"${CAPTURE_DIR}/nix-agent-active.txt" || true if [[ "${NODE_ROLE}" == "control-plane" ]]; then run_ssh 'journalctl -b -u chainfire.service --no-pager || true' >"${CAPTURE_DIR}/journal-chainfire.log" || true run_ssh 'systemctl is-active chainfire.service || true' >"${CAPTURE_DIR}/chainfire-active.txt" || true fi grep 'ULTRACLOUD_MARKER' "${CAPTURE_DIR}/journal-boot.log" >"${CAPTURE_DIR}/marker-summary.log" || true grep 'ULTRACLOUD_MARKER desired-system-active\.' "${CAPTURE_DIR}/journal-boot.log" >"${CAPTURE_DIR}/desired-system-active.log" || true return 0 } capture_success() { copy_serial_log_if_present if capture_over_ssh; then : fi if [[ -s "${CAPTURE_DIR}/desired-system-active.log" ]]; then if [[ -f "${CAPTURE_DIR}/nix-agent-active.txt" ]]; then grep -Eq '^active$' "${CAPTURE_DIR}/nix-agent-active.txt" || return 1 fi if [[ "${NODE_ROLE}" == "control-plane" && -f "${CAPTURE_DIR}/chainfire-active.txt" ]]; then grep -Eq '^active$' "${CAPTURE_DIR}/chainfire-active.txt" || return 1 fi return 0 fi if [[ -f "${CAPTURE_DIR}/serial.log" ]]; then grep 'ULTRACLOUD_MARKER desired-system-active\.' "${CAPTURE_DIR}/serial.log" >"${CAPTURE_DIR}/desired-system-active.log" || true [[ -s "${CAPTURE_DIR}/desired-system-active.log" ]] return $? fi return 1 } run_usb_transport() { local iso_path iso_path="$(materialize_iso_for_usb)" [[ -n "${iso_path}" ]] || { append_missing "USB transport: unable to resolve ultracloud-iso image from ${ISO_ATTR}" return 1 } { printf 'mode=usb\n' printf 'iso_path=%s\n' "${iso_path}" printf 'usb_device=%s\n' "${USB_DEVICE}" } >"${PROOF_ROOT}/transport.env" log "writing ${iso_path} to ${USB_DEVICE}" lsblk "${USB_DEVICE}" >"${PROOF_ROOT}/usb-device-before.txt" 2>&1 || true dd if="${iso_path}" of="${USB_DEVICE}" bs=16M conv=fsync status=progress 2>&1 | tee "${TRANSPORT_LOG}" sync lsblk "${USB_DEVICE}" >"${PROOF_ROOT}/usb-device-after.txt" 2>&1 || true } redfish_curl() { local method="$1" local url="$2" local body_file="$3" local out_prefix="$4" local curl_args=( -sS -X "${method}" -u "${REDFISH_USERNAME}:${REDFISH_PASSWORD}" -D "${PROOF_ROOT}/${out_prefix}.headers" -o "${PROOF_ROOT}/${out_prefix}.body" ) if [[ "${REDFISH_INSECURE}" == "1" ]]; then curl_args+=(-k) fi if [[ -n "${body_file}" ]]; then curl_args+=(-H 'Content-Type: application/json' --data @"${body_file}") fi curl "${curl_args[@]}" "${url}" } run_redfish_transport() { local endpoint="${REDFISH_ENDPOINT%/}" local virtual_media_url="${endpoint}/redfish/v1/Managers/${REDFISH_MANAGER_ID}/VirtualMedia/${REDFISH_VIRTUAL_MEDIA_ID}" local system_url="${endpoint}/redfish/v1/Systems/${REDFISH_SYSTEM_ID}" local insert_body="${PROOF_ROOT}/insert-media.json" local boot_body="${PROOF_ROOT}/boot-override.json" local reset_body="${PROOF_ROOT}/reset.json" cat >"${insert_body}" <"${boot_body}" <"${reset_body}" <"${PROOF_ROOT}/transport.env" redfish_curl POST "${virtual_media_url}/EjectMedia" "" "redfish-eject-media" || true redfish_curl POST "${virtual_media_url}/InsertMedia" "${insert_body}" "redfish-insert-media" redfish_curl PATCH "${system_url}" "${boot_body}" "redfish-boot-override" redfish_curl POST "${system_url}/Actions/ComputerSystem.Reset" "${reset_body}" "redfish-reset" } preflight() { detect_transport validate_common_requirements validate_transport_requirements write_environment write_iso_reference write_kernel_params write_expected_markers write_failure_markers write_missing_requirements write_operator_handoff if (( ${#MISSING_REQUIREMENTS[@]} == 0 )); then write_status ready log "hardware-smoke preflight is ready; artifacts in ${PROOF_ROOT}" else write_status blocked log "hardware-smoke preflight is blocked; see ${MISSING_FILE}" fi } run_mode() { preflight if grep -Eq '^status=blocked$' "${STATUS_FILE}"; then return 2 fi case "${RESOLVED_TRANSPORT}" in usb) run_usb_transport ;; redfish) run_redfish_transport ;; *) return 2 ;; esac if capture_success; then write_status success log "hardware smoke captured desired-system evidence in ${CAPTURE_DIR}" return 0 fi write_status failed log "hardware smoke did not capture desired-system evidence; see ${CAPTURE_DIR}" return 3 } capture_mode() { detect_transport write_environment write_iso_reference write_kernel_params write_expected_markers write_failure_markers write_operator_handoff if capture_success; then write_missing_requirements write_status success log "captured desired-system evidence in ${CAPTURE_DIR}" return 0 fi if [[ -z "${SSH_HOST}" && -z "${SERIAL_LOG}" ]]; then append_missing "capture channel: set ULTRACLOUD_HARDWARE_SSH_HOST or ULTRACLOUD_HARDWARE_SERIAL_LOG" write_missing_requirements write_status blocked return 2 fi write_missing_requirements write_status failed return 3 } main() { prepare_paths case "$(mode_normalized)" in preflight) preflight ;; run) run_mode ;; capture) capture_mode ;; esac } main "$@"