diff --git a/scripts/install-k8s.ps1 b/scripts/install-k8s.ps1 index 94ca784..e1d41dd 100644 --- a/scripts/install-k8s.ps1 +++ b/scripts/install-k8s.ps1 @@ -738,6 +738,22 @@ function Write-K3dProxyConfig { return $cfg } +# Guarantee the cluster returns after a reboot: ensure the k3d node containers +# restart when Docker starts. k3d already sets unless-stopped; this is defensive +# and also covers externally-created clusters. On Windows the remaining piece is +# Docker Desktop starting on login, which the summary tells the user to enable. +# Opt out with TRACEBLOC_NO_AUTOSTART=1. +function Set-ClusterAutostart { + if ($env:TRACEBLOC_NO_AUTOSTART) { return } + try { + $nodes = docker ps -a --filter "name=k3d-$CLUSTER_NAME-" --format "{{.Names}}" 2>$null + foreach ($n in $nodes) { + if ($n) { docker update --restart unless-stopped $n 2>&1 | Out-Null } + } + if ($nodes) { Log "Set restart=unless-stopped on k3d nodes (auto-restart after reboot)." } + } catch {} +} + function New-K3dCluster { Log "Creating k3d cluster: '$CLUSTER_NAME'" @@ -865,6 +881,8 @@ function New-K3dCluster { } Log "kubeconfig updated -- kubectl now points to '$CLUSTER_NAME'." + + Set-ClusterAutostart } # ============================================================================= @@ -1244,6 +1262,8 @@ function Print-Summary { Write-Host "" Hint "Models that vendors submit train on this machine -- your data never leaves it." Write-Host "" + Hint "After a reboot, start Docker Desktop to bring your client back (enable 'Start Docker Desktop when you sign in' in Settings -> General to automate)." + Write-Host "" Write-Host " What to do next" -ForegroundColor White Write-Host " 1. Ingest your training and test data" Write-Host " 2. Define your first AI use case and invite vendors" diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index f81bd6e..e8c3541 100755 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -125,11 +125,42 @@ create_cluster() { _create_new_cluster fi + ensure_cluster_autostart _merge_kubeconfig _export_host_no_proxy _wait_for_api } +# Guarantee the cluster returns after a host reboot. On Linux this already works +# by default — k3d sets `--restart unless-stopped` on its node containers and the +# Docker install enables docker.service on boot — but we harden both so it holds +# even on a re-run where Docker was installed-but-disabled, or for an externally- +# created cluster. On macOS/Windows the restart policy is set too, but Docker +# Desktop must be configured to start on login (the summary tells the user). +# Opt out with TRACEBLOC_NO_AUTOSTART=1. +ensure_cluster_autostart() { + if [[ -n "${TRACEBLOC_NO_AUTOSTART:-}" ]]; then return 0; fi + + local nodes node + nodes=$(docker ps -a --filter "name=k3d-${CLUSTER_NAME}-" --format '{{.Names}}' 2>/dev/null) || return 0 + if [[ -n "$nodes" ]]; then + for node in $nodes; do + docker update --restart unless-stopped "$node" >/dev/null 2>&1 || true + done + log "Set restart=unless-stopped on k3d nodes so the cluster returns after a reboot." + fi + + # On Linux, make sure Docker itself starts on boot. The fresh-install path only + # enables docker.service when Docker was absent; this also covers the + # installed-but-disabled re-run case. Idempotent. + if [[ "$OS" == "Linux" ]] && has systemctl; then + if sudo systemctl enable docker >/dev/null 2>&1; then + log "Ensured docker.service is enabled on boot." + fi + fi + return 0 +} + _handle_existing_cluster() { CLUSTER_STATUS="0" if command -v jq &>/dev/null; then diff --git a/scripts/lib/summary.sh b/scripts/lib/summary.sh index 90e784b..8e72ba5 100755 --- a/scripts/lib/summary.sh +++ b/scripts/lib/summary.sh @@ -70,6 +70,17 @@ _diagnose_not_ready() { # Reports the outcome based on CLIENT_STATE (set by wait_for_client_ready). # The "secure compute environment / your data never leaves" claim is printed # ONLY when the client is verifiably connected — never on a partial/failed run. +# One-line note in the success summary so the user knows the client survives a +# reboot — automatic on Linux; needs Docker Desktop start-on-login on macOS/Win. +_reboot_note() { + if [[ "$OS" == "Linux" ]]; then + echo -e " ${GREEN}✔${RESET} ${DIM}Survives reboot — Docker and your client restart automatically.${RESET}" + else + echo -e " ${DIM}After a reboot, start Docker Desktop to bring your client back —${RESET}" + echo -e " ${DIM}enable Settings → General → \"Start Docker Desktop when you sign in\" to automate.${RESET}" + fi +} + print_summary() { local mode="CPU" [[ "$GPU_VENDOR" == "nvidia" ]] && mode="NVIDIA GPU" @@ -93,6 +104,8 @@ print_summary() { echo -e " ${DIM}Models that vendors submit train on this machine —${RESET}" echo -e " ${DIM}your data never leaves it.${RESET}" echo "" + _reboot_note + echo "" echo -e " ${BOLD}What to do next${RESET}" echo -e " ${WHITE}1.${RESET} Ingest your training and test data" echo -e " ${WHITE}2.${RESET} Define your first AI use case and invite vendors" diff --git a/scripts/tests/cluster.bats b/scripts/tests/cluster.bats index 0b7705c..a21bb62 100644 --- a/scripts/tests/cluster.bats +++ b/scripts/tests/cluster.bats @@ -160,3 +160,44 @@ setup() { run _check_existing_cluster_proxy [[ "$output" == *"missing: HTTP_PROXY"* ]] } + +# ── ensure_cluster_autostart (reboot persistence) ─────────────────────────── +@test "ensure_cluster_autostart: unless-stopped per node + enables docker (Linux)" { + OS=Linux + docker() { if [[ "$1 $2" == "ps -a" ]]; then printf '%s\n' "k3d-tracebloc-server-0" "k3d-tracebloc-serverlb"; else record "docker $*"; fi; } + sudo() { record "sudo $*"; } + has() { return 0; } + ensure_cluster_autostart + run mock_calls + [[ "$output" == *"docker update --restart unless-stopped k3d-tracebloc-server-0"* ]] + [[ "$output" == *"docker update --restart unless-stopped k3d-tracebloc-serverlb"* ]] + [[ "$output" == *"sudo systemctl enable docker"* ]] +} + +@test "ensure_cluster_autostart: macOS does not enable docker.service" { + OS=Darwin + docker() { if [[ "$1 $2" == "ps -a" ]]; then echo "k3d-tracebloc-server-0"; else record "docker $*"; fi; } + sudo() { record "sudo $*"; } + has() { return 0; } + ensure_cluster_autostart + run mock_calls + [[ "$output" == *"docker update --restart unless-stopped"* ]] + [[ "$output" != *"systemctl enable docker"* ]] +} + +@test "ensure_cluster_autostart: TRACEBLOC_NO_AUTOSTART -> no-op" { + OS=Linux + docker() { if [[ "$1 $2" == "ps -a" ]]; then echo "k3d-tracebloc-server-0"; else record "docker $*"; fi; } + sudo() { record "sudo $*"; } + TRACEBLOC_NO_AUTOSTART=1 ensure_cluster_autostart + run mock_calls + [ -z "$output" ] +} + +@test "ensure_cluster_autostart: no nodes -> no docker update" { + OS=Darwin + docker() { if [[ "$1 $2" == "ps -a" ]]; then echo ""; else record "docker $*"; fi; } + ensure_cluster_autostart + run mock_calls + [[ "$output" != *"docker update"* ]] +} diff --git a/scripts/tests/install-k8s.Tests.ps1 b/scripts/tests/install-k8s.Tests.ps1 index 54341e5..e1cebc6 100644 --- a/scripts/tests/install-k8s.Tests.ps1 +++ b/scripts/tests/install-k8s.Tests.ps1 @@ -7,6 +7,7 @@ BeforeAll { . "$PSScriptRoot/../install-k8s.ps1" # Stubs so Pester can mock external commands that the functions invoke. function kubectl { } + function docker { } } Describe "Get-BackendUrl" { @@ -359,3 +360,21 @@ Describe "Test-Preflight" { { Test-Preflight } | Should -Not -Throw } } + +# --- reboot persistence (Set-ClusterAutostart) ------------------------------- +Describe "Set-ClusterAutostart" { + AfterEach { $env:TRACEBLOC_NO_AUTOSTART = $null } + It "sets unless-stopped on each k3d node" { + Mock docker { + if (($args -join ' ') -match 'ps -a') { return @("k3d-tracebloc-server-0", "k3d-tracebloc-serverlb") } + } + Set-ClusterAutostart + Should -Invoke docker -ParameterFilter { ($args -join ' ') -match 'update --restart unless-stopped' } -Times 2 + } + It "TRACEBLOC_NO_AUTOSTART -> no docker calls" { + $env:TRACEBLOC_NO_AUTOSTART = "1" + Mock docker { } + Set-ClusterAutostart + Should -Invoke docker -Times 0 -Exactly + } +} diff --git a/scripts/tests/summary.bats b/scripts/tests/summary.bats index e2f4e47..924080a 100644 --- a/scripts/tests/summary.bats +++ b/scripts/tests/summary.bats @@ -91,3 +91,24 @@ setup() { [[ "$output" == *"crash loop"* ]] [[ "$output" != *"data never leaves"* ]] } + +# ── _reboot_note (reboot persistence) ─────────────────────────────────────── +@test "_reboot_note: Linux -> survives-reboot line" { + OS=Linux + run _reboot_note + [[ "$output" == *"Survives reboot"* ]] + [[ "$output" != *"Docker Desktop"* ]] +} + +@test "_reboot_note: macOS -> Docker Desktop start-on-login instruction" { + OS=Darwin + run _reboot_note + [[ "$output" == *"Docker Desktop"* ]] + [[ "$output" == *"sign in"* ]] +} + +@test "print_summary connected: includes the reboot note" { + CLIENT_STATE=connected; OS=Linux + run print_summary + [[ "$output" == *"Survives reboot"* ]] +}