Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions scripts/install-k8s.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,22 @@ function Write-K3dProxyConfig {
return $cfg
}

# Guarantee the cluster returns after a reboot: ensure the k3d node containers
# restart when Docker starts. k3d already sets unless-stopped; this is defensive
# and also covers externally-created clusters. On Windows the remaining piece is
# Docker Desktop starting on login, which the summary tells the user to enable.
# Opt out with TRACEBLOC_NO_AUTOSTART=1.
function Set-ClusterAutostart {
if ($env:TRACEBLOC_NO_AUTOSTART) { return }
try {
$nodes = docker ps -a --filter "name=k3d-$CLUSTER_NAME-" --format "{{.Names}}" 2>$null
foreach ($n in $nodes) {
if ($n) { docker update --restart unless-stopped $n 2>&1 | Out-Null }
}
if ($nodes) { Log "Set restart=unless-stopped on k3d nodes (auto-restart after reboot)." }
} catch {}
}

function New-K3dCluster {
Log "Creating k3d cluster: '$CLUSTER_NAME'"

Expand Down Expand Up @@ -865,6 +881,8 @@ function New-K3dCluster {
}

Log "kubeconfig updated -- kubectl now points to '$CLUSTER_NAME'."

Set-ClusterAutostart
}

# =============================================================================
Expand Down Expand Up @@ -1244,6 +1262,8 @@ function Print-Summary {
Write-Host ""
Hint "Models that vendors submit train on this machine -- your data never leaves it."
Write-Host ""
Hint "After a reboot, start Docker Desktop to bring your client back (enable 'Start Docker Desktop when you sign in' in Settings -> General to automate)."
Write-Host ""
Write-Host " What to do next" -ForegroundColor White
Write-Host " 1. Ingest your training and test data"
Write-Host " 2. Define your first AI use case and invite vendors"
Expand Down
31 changes: 31 additions & 0 deletions scripts/lib/cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,42 @@ create_cluster() {
_create_new_cluster
fi

ensure_cluster_autostart
_merge_kubeconfig
_export_host_no_proxy
_wait_for_api
}

# Guarantee the cluster returns after a host reboot. On Linux this already works
# by default β€” k3d sets `--restart unless-stopped` on its node containers and the
# Docker install enables docker.service on boot β€” but we harden both so it holds
# even on a re-run where Docker was installed-but-disabled, or for an externally-
# created cluster. On macOS/Windows the restart policy is set too, but Docker
# Desktop must be configured to start on login (the summary tells the user).
# Opt out with TRACEBLOC_NO_AUTOSTART=1.
ensure_cluster_autostart() {
if [[ -n "${TRACEBLOC_NO_AUTOSTART:-}" ]]; then return 0; fi

local nodes node
nodes=$(docker ps -a --filter "name=k3d-${CLUSTER_NAME}-" --format '{{.Names}}' 2>/dev/null) || return 0
if [[ -n "$nodes" ]]; then
for node in $nodes; do
docker update --restart unless-stopped "$node" >/dev/null 2>&1 || true
done
log "Set restart=unless-stopped on k3d nodes so the cluster returns after a reboot."
fi

# On Linux, make sure Docker itself starts on boot. The fresh-install path only
# enables docker.service when Docker was absent; this also covers the
# installed-but-disabled re-run case. Idempotent.
if [[ "$OS" == "Linux" ]] && has systemctl; then
if sudo systemctl enable docker >/dev/null 2>&1; then
log "Ensured docker.service is enabled on boot."
fi
fi
return 0
}

_handle_existing_cluster() {
CLUSTER_STATUS="0"
if command -v jq &>/dev/null; then
Expand Down
13 changes: 13 additions & 0 deletions scripts/lib/summary.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,17 @@ _diagnose_not_ready() {
# Reports the outcome based on CLIENT_STATE (set by wait_for_client_ready).
# The "secure compute environment / your data never leaves" claim is printed
# ONLY when the client is verifiably connected β€” never on a partial/failed run.
# One-line note in the success summary so the user knows the client survives a
# reboot β€” automatic on Linux; needs Docker Desktop start-on-login on macOS/Win.
_reboot_note() {
if [[ "$OS" == "Linux" ]]; then
echo -e " ${GREEN}βœ”${RESET} ${DIM}Survives reboot β€” Docker and your client restart automatically.${RESET}"
else
echo -e " ${DIM}After a reboot, start Docker Desktop to bring your client back β€”${RESET}"
echo -e " ${DIM}enable Settings β†’ General β†’ \"Start Docker Desktop when you sign in\" to automate.${RESET}"
fi
}

print_summary() {
local mode="CPU"
[[ "$GPU_VENDOR" == "nvidia" ]] && mode="NVIDIA GPU"
Expand All @@ -93,6 +104,8 @@ print_summary() {
echo -e " ${DIM}Models that vendors submit train on this machine β€”${RESET}"
echo -e " ${DIM}your data never leaves it.${RESET}"
echo ""
_reboot_note
echo ""
echo -e " ${BOLD}What to do next${RESET}"
echo -e " ${WHITE}1.${RESET} Ingest your training and test data"
echo -e " ${WHITE}2.${RESET} Define your first AI use case and invite vendors"
Expand Down
41 changes: 41 additions & 0 deletions scripts/tests/cluster.bats
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,44 @@ setup() {
run _check_existing_cluster_proxy
[[ "$output" == *"missing: HTTP_PROXY"* ]]
}

# ── ensure_cluster_autostart (reboot persistence) ───────────────────────────
@test "ensure_cluster_autostart: unless-stopped per node + enables docker (Linux)" {
OS=Linux
docker() { if [[ "$1 $2" == "ps -a" ]]; then printf '%s\n' "k3d-tracebloc-server-0" "k3d-tracebloc-serverlb"; else record "docker $*"; fi; }
sudo() { record "sudo $*"; }
has() { return 0; }
ensure_cluster_autostart
run mock_calls
[[ "$output" == *"docker update --restart unless-stopped k3d-tracebloc-server-0"* ]]
[[ "$output" == *"docker update --restart unless-stopped k3d-tracebloc-serverlb"* ]]
[[ "$output" == *"sudo systemctl enable docker"* ]]
}

@test "ensure_cluster_autostart: macOS does not enable docker.service" {
OS=Darwin
docker() { if [[ "$1 $2" == "ps -a" ]]; then echo "k3d-tracebloc-server-0"; else record "docker $*"; fi; }
sudo() { record "sudo $*"; }
has() { return 0; }
ensure_cluster_autostart
run mock_calls
[[ "$output" == *"docker update --restart unless-stopped"* ]]
[[ "$output" != *"systemctl enable docker"* ]]
}

@test "ensure_cluster_autostart: TRACEBLOC_NO_AUTOSTART -> no-op" {
OS=Linux
docker() { if [[ "$1 $2" == "ps -a" ]]; then echo "k3d-tracebloc-server-0"; else record "docker $*"; fi; }
sudo() { record "sudo $*"; }
TRACEBLOC_NO_AUTOSTART=1 ensure_cluster_autostart
run mock_calls
[ -z "$output" ]
}

@test "ensure_cluster_autostart: no nodes -> no docker update" {
OS=Darwin
docker() { if [[ "$1 $2" == "ps -a" ]]; then echo ""; else record "docker $*"; fi; }
ensure_cluster_autostart
run mock_calls
[[ "$output" != *"docker update"* ]]
}
19 changes: 19 additions & 0 deletions scripts/tests/install-k8s.Tests.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ BeforeAll {
. "$PSScriptRoot/../install-k8s.ps1"
# Stubs so Pester can mock external commands that the functions invoke.
function kubectl { }
function docker { }
}

Describe "Get-BackendUrl" {
Expand Down Expand Up @@ -359,3 +360,21 @@ Describe "Test-Preflight" {
{ Test-Preflight } | Should -Not -Throw
}
}

# --- reboot persistence (Set-ClusterAutostart) -------------------------------
Describe "Set-ClusterAutostart" {
AfterEach { $env:TRACEBLOC_NO_AUTOSTART = $null }
It "sets unless-stopped on each k3d node" {
Mock docker {
if (($args -join ' ') -match 'ps -a') { return @("k3d-tracebloc-server-0", "k3d-tracebloc-serverlb") }
}
Set-ClusterAutostart
Should -Invoke docker -ParameterFilter { ($args -join ' ') -match 'update --restart unless-stopped' } -Times 2
}
It "TRACEBLOC_NO_AUTOSTART -> no docker calls" {
$env:TRACEBLOC_NO_AUTOSTART = "1"
Mock docker { }
Set-ClusterAutostart
Should -Invoke docker -Times 0 -Exactly
}
}
21 changes: 21 additions & 0 deletions scripts/tests/summary.bats
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,24 @@ setup() {
[[ "$output" == *"crash loop"* ]]
[[ "$output" != *"data never leaves"* ]]
}

# ── _reboot_note (reboot persistence) ───────────────────────────────────────
@test "_reboot_note: Linux -> survives-reboot line" {
OS=Linux
run _reboot_note
[[ "$output" == *"Survives reboot"* ]]
[[ "$output" != *"Docker Desktop"* ]]
}

@test "_reboot_note: macOS -> Docker Desktop start-on-login instruction" {
OS=Darwin
run _reboot_note
[[ "$output" == *"Docker Desktop"* ]]
[[ "$output" == *"sign in"* ]]
}

@test "print_summary connected: includes the reboot note" {
CLIENT_STATE=connected; OS=Linux
run print_summary
[[ "$output" == *"Survives reboot"* ]]
}
Loading