diff --git a/scripts/install-k8s.ps1 b/scripts/install-k8s.ps1 index bbfd7cc..74984ff 100644 --- a/scripts/install-k8s.ps1 +++ b/scripts/install-k8s.ps1 @@ -769,6 +769,9 @@ function Set-ClusterAutostart { function New-K3dCluster { Log "Creating k3d cluster: '$CLUSTER_NAME'" + # Docker is up now (unlike at preflight); re-check the runtime's real memory budget. + Test-PreflightRuntimeMem + $clusterExists = $false $clusterObj = $null try { @@ -1406,12 +1409,33 @@ function Get-PfFreeGb { } catch { return $null } } +# Memory/CPU as the container runtime sees it (the Docker Desktop / WSL2 VM budget, +# which is what the pods actually get — smaller than the host). $null if the daemon +# is down or the value is junk, so callers fall back to the host (CIM) reader. +function Get-PfRuntimeMemGb { + try { + $v = ((docker info --format '{{.MemTotal}}' 2>$null) | Out-String).Trim() + if ($v -match '^\d+$' -and [int64]$v -gt 0) { return [math]::Floor([int64]$v / 1GB) } + } catch {} + return $null +} +function Get-PfRuntimeCpu { + try { + $v = ((docker info --format '{{.NCPU}}' 2>$null) | Out-String).Trim() + if ($v -match '^\d+$' -and [int]$v -gt 0) { return [int]$v } + } catch {} + return $null +} + +# Prefer the runtime view, fall back to the host (CIM). function Get-PfMemGb { + $r = Get-PfRuntimeMemGb; if ($null -ne $r) { return $r } try { return [math]::Floor((Get-CimInstance Win32_ComputerSystem -ErrorAction Stop).TotalPhysicalMemory / 1GB) } catch { return $null } } function Get-PfCpu { + $r = Get-PfRuntimeCpu; if ($null -ne $r) { return $r } try { return [int](Get-CimInstance Win32_ComputerSystem -ErrorAction Stop).NumberOfLogicalProcessors } catch { if ($env:NUMBER_OF_PROCESSORS) { return [int]$env:NUMBER_OF_PROCESSORS } else { return $null } } } @@ -1419,10 +1443,13 @@ function Get-PfCpu { function Test-Preflight { if ($env:TRACEBLOC_SKIP_PREFLIGHT) { Info "Preflight checks skipped (TRACEBLOC_SKIP_PREFLIGHT set)."; return } - $minDiskGb = if ($env:PF_MIN_DISK_GB) { [int]$env:PF_MIN_DISK_GB } else { 5 } + $minDiskGb = if ($env:PF_MIN_DISK_GB) { [int]$env:PF_MIN_DISK_GB } else { 10 } $warnDiskGb = if ($env:PF_WARN_DISK_GB) { [int]$env:PF_WARN_DISK_GB } else { 20 } - $warnMemGb = if ($env:PF_WARN_MEM_GB) { [int]$env:PF_WARN_MEM_GB } else { 4 } + $minMemGb = if ($env:PF_MIN_MEM_GB) { [int]$env:PF_MIN_MEM_GB } else { 5 } + $warnMemGb = if ($env:PF_WARN_MEM_GB) { [int]$env:PF_WARN_MEM_GB } else { 8 } + $recMemGb = if ($env:PF_REC_MEM_GB) { [int]$env:PF_REC_MEM_GB } else { 16 } $minCpu = if ($env:PF_MIN_CPU) { [int]$env:PF_MIN_CPU } else { 2 } + $recCpu = if ($env:PF_REC_CPU) { [int]$env:PF_REC_CPU } else { 4 } $hardFail = 0 # Architecture — the tracebloc client images (e.g. mysql-client) are amd64-only. @@ -1437,12 +1464,22 @@ function Test-Preflight { $cpu = Get-PfCpu if ($null -eq $cpu) { Warn "CPU: couldn't determine core count (skipping)." } - elseif ($cpu -lt $minCpu) { Warn "CPU: $cpu core(s) - recommended >= $minCpu." } + elseif ($cpu -lt $minCpu) { Warn "CPU: $cpu core(s) - below the $minCpu-core minimum; mysql may hit lock-wait timeouts. $recCpu+ recommended to train." } + elseif ($cpu -lt $recCpu) { Warn "CPU: $cpu cores - fine to run; $recCpu+ recommended to train locally." } else { Ok "CPU: $cpu cores" } + # Memory is warn-only on Windows: at preflight the Docker Desktop / WSL2 daemon may + # be down (so this is host RAM); the post-Docker re-check sees the real VM budget. $mem = Get-PfMemGb if ($null -eq $mem) { Warn "Memory: couldn't determine total RAM (skipping)." } - elseif ($mem -lt $warnMemGb) { Warn "Memory: $mem GB total - recommended >= $warnMemGb GB; k3s + training may run out of memory." } + elseif ($mem -lt $minMemGb) { + Warn "Memory: $mem GB - below the $minMemGb GB the client needs; it will OOM." + Hint "Docker Desktop -> Settings -> Resources -> Memory: raise to >= $warnMemGb GB ($recMemGb GB to train), then re-run." + } + elseif ($mem -lt $warnMemGb) { + Warn "Memory: $mem GB - enough to run, but training (~8 GB/job) may OOM; $recMemGb GB recommended to train locally." + Hint "Docker Desktop -> Settings -> Resources -> Memory >= $recMemGb GB to train." + } else { Ok "Memory: $mem GB" } $disk = Get-PfFreeGb @@ -1479,6 +1516,22 @@ function Test-Preflight { } } +# Re-evaluate memory once Docker is confirmed up. Test-Preflight runs before Docker +# Desktop starts, so its read may have been host RAM, not the (smaller) Docker VM +# budget. Called from New-K3dCluster. WARN-only — the user has already waited for +# Docker, so aborting here would be jarring. +function Test-PreflightRuntimeMem { + if ($env:TRACEBLOC_SKIP_PREFLIGHT) { return } + $mem = Get-PfRuntimeMemGb + if ($null -eq $mem) { return } + $warnMemGb = if ($env:PF_WARN_MEM_GB) { [int]$env:PF_WARN_MEM_GB } else { 8 } + $recMemGb = if ($env:PF_REC_MEM_GB) { [int]$env:PF_REC_MEM_GB } else { 16 } + if ($mem -lt $warnMemGb) { + Warn "Docker is running with $mem GB - recommended >= $warnMemGb GB ($recMemGb GB to train); the client may OOM under load." + Hint "Docker Desktop -> Settings -> Resources -> Memory >= $warnMemGb GB, then re-install." + } +} + # ============================================================================= # DIAGNOSE — `-Diagnose` support bundle (mirrors scripts/lib/diagnose.sh) # ============================================================================= diff --git a/scripts/lib/cluster.sh b/scripts/lib/cluster.sh index ccb22df..100e7d3 100755 --- a/scripts/lib/cluster.sh +++ b/scripts/lib/cluster.sh @@ -119,6 +119,11 @@ create_cluster() { _ensure_tracebloc_dirs + # Docker is up now (unlike at preflight time), so re-check the runtime's real + # memory budget — a too-small Docker VM (Mac/Win) surfaces before we build out. + # Guarded: cluster.sh can be sourced without preflight.sh (e.g. the e2e harness). + if declare -F _pf_recheck_runtime_mem >/dev/null 2>&1; then _pf_recheck_runtime_mem || true; fi + if _cluster_exists; then _handle_existing_cluster else diff --git a/scripts/lib/preflight.sh b/scripts/lib/preflight.sh index 2e1df9f..5a8021d 100644 --- a/scripts/lib/preflight.sh +++ b/scripts/lib/preflight.sh @@ -10,15 +10,23 @@ # Escape hatches: # TRACEBLOC_SKIP_PREFLIGHT=1 skip all checks # TRACEBLOC_ALLOW_ARM64=1 proceed on arm64 despite amd64-only images +# PF_MIN_MEM_GB / PF_MIN_CPU / PF_MIN_DISK_GB lower the hard floors (CI / odd sites) # # This file is side-effect-safe to source (defaults + function defs only). # ============================================================================= -# Thresholds (overridable via env — for unusual sites or tests) -PF_MIN_DISK_GB="${PF_MIN_DISK_GB:-5}" # hard-fail below this (Linux) +# Thresholds (overridable via env — for unusual sites or tests). +# RAM floors are derived from the real stack, not guessed: the always-on control +# plane requests ~2.1 GiB, + k3s/k3d ~0.8 + OS/Docker ~0.7 ≈ ~4.4 GiB just to stay +# Online on a single-node (k3d) install — so below 5 GiB it boots then OOMs. 8 GiB +# is comfortable to run; 16 GiB is needed to train locally (a job's limit is ~8 GiB+). +PF_MIN_DISK_GB="${PF_MIN_DISK_GB:-10}" # hard-fail below this (Linux) — base images alone need >5 PF_WARN_DISK_GB="${PF_WARN_DISK_GB:-20}" # warn below this -PF_WARN_MEM_GB="${PF_WARN_MEM_GB:-4}" # warn below this +PF_MIN_MEM_GB="${PF_MIN_MEM_GB:-5}" # hard-fail below this (Linux; warn on Mac/Win) +PF_WARN_MEM_GB="${PF_WARN_MEM_GB:-8}" # warn below this (comfortable to run) +PF_REC_MEM_GB="${PF_REC_MEM_GB:-16}" # recommended to train locally (copy only, not a gate) PF_MIN_CPU="${PF_MIN_CPU:-2}" # warn below this +PF_REC_CPU="${PF_REC_CPU:-4}" # recommended (warn) below this # Non-exiting failure line (common.sh's error() exits; preflight must finish all # checks first, so failures print here and are recorded in PF_HARD_FAIL). Writes @@ -50,8 +58,26 @@ _pf_probe_url() { # Free space in KB on the filesystem holding $1. _pf_free_kb() { df -Pk "$1" 2>/dev/null | awk 'NR==2 {print $4}'; } -# Total physical RAM in KB. -_pf_total_mem_kb() { +# Memory/CPU as the CONTAINER RUNTIME sees it (the budget the pods actually get). +# On Docker Desktop / Colima / WSL2 this is the VM's allocation — smaller than the +# host and the number that matters (a 36 GB Mac can cap its Docker VM at 4 GB). Echo +# a single integer, or nothing if the daemon is down / the value is junk — callers +# then fall back to the host reader. (docker info precedent: _pf_docker_root above.) +_pf_runtime_mem_kb() { + has docker && docker info >/dev/null 2>&1 || return 0 + local b; b="$(docker info --format '{{.MemTotal}}' 2>/dev/null)" + [[ "$b" =~ ^[0-9]+$ && "$b" -gt 0 ]] && echo $(( b / 1024 )) + return 0 +} +_pf_runtime_ncpu() { + has docker && docker info >/dev/null 2>&1 || return 0 + local n; n="$(docker info --format '{{.NCPU}}' 2>/dev/null)" + [[ "$n" =~ ^[0-9]+$ && "$n" -gt 0 ]] && echo "$n" + return 0 +} + +# Total physical RAM of the HOST in KB. +_pf_host_mem_kb() { if [[ "$OS" == "Darwin" ]]; then local b; b=$(sysctl -n hw.memsize 2>/dev/null) || b="" [[ -n "$b" ]] && echo $(( b / 1024 )) @@ -60,8 +86,8 @@ _pf_total_mem_kb() { fi } -# Logical CPU count. -_pf_ncpu() { +# Logical CPU count of the HOST. +_pf_host_ncpu() { if [[ "$OS" == "Darwin" ]]; then sysctl -n hw.ncpu 2>/dev/null else @@ -69,6 +95,14 @@ _pf_ncpu() { fi } +# Available (free) RAM right now, KB — Linux only (for the busy-shared-VM warn). +_pf_avail_mem_kb() { awk '/^MemAvailable:/ {print $2}' /proc/meminfo 2>/dev/null; } + +# Selectors: prefer the runtime view, fall back to the host. The checks (and the +# bats numeric test) call these names; they always emit exactly one integer. +_pf_total_mem_kb() { local v; v="$(_pf_runtime_mem_kb)"; [[ -n "$v" ]] && { echo "$v"; return 0; }; _pf_host_mem_kb; } +_pf_ncpu() { local v; v="$(_pf_runtime_ncpu)"; [[ -n "$v" ]] && { echo "$v"; return 0; }; _pf_host_ncpu; } + # Docker data root if the daemon is up; else where it will live / a host proxy. _pf_docker_root() { if has docker && docker info >/dev/null 2>&1; then @@ -124,8 +158,12 @@ _pf_arch() { _pf_cpu() { local n; n="$(_pf_ncpu)" if [[ -z "$n" ]]; then warn "CPU: couldn't determine core count (skipping)."; return 0; fi + # CPU is warn-only: starvation throttles (and can trip mysql InnoDB lock-wait + # timeouts) but doesn't OOM-kill, and the chart deliberately omits limits.cpu. if [[ "$n" -lt "$PF_MIN_CPU" ]]; then - warn "CPU: ${n} core(s) — recommended ≥ ${PF_MIN_CPU}." + warn "CPU: ${n} core(s) — below the ${PF_MIN_CPU}-core minimum; mysql may hit lock-wait timeouts. ${PF_REC_CPU}+ recommended to train." + elif [[ "$n" -lt "$PF_REC_CPU" ]]; then + warn "CPU: ${n} cores — fine to run; ${PF_REC_CPU}+ recommended to train locally." else success "CPU: ${n} cores" fi @@ -133,13 +171,62 @@ _pf_cpu() { } _pf_memory() { - local kb gb; kb="$(_pf_total_mem_kb)" + local kb gb mib floor_mib warn_mib src + kb="$(_pf_total_mem_kb)" if [[ -z "$kb" ]]; then warn "Memory: couldn't determine total RAM (skipping)."; return 0; fi gb=$(( kb / 1024 / 1024 )) - if [[ "$gb" -lt "$PF_WARN_MEM_GB" ]]; then - warn "Memory: ${gb} GB total — recommended ≥ ${PF_WARN_MEM_GB} GB; k3s + training may run out of memory." + mib=$(( kb / 1024 )) + # Compare in MiB with a 64 MiB grace so a VM that reports e.g. 4 GiB a hair under + # 4*1024^3 (Colima / Docker Desktop) doesn't floor to 3 GB and false-trip the gate. + floor_mib=$(( PF_MIN_MEM_GB * 1024 - 64 )) + warn_mib=$(( PF_WARN_MEM_GB * 1024 )) + src="host"; [[ -n "$(_pf_runtime_mem_kb)" ]] && src="Docker VM" + + if [[ "$mib" -lt "$floor_mib" ]]; then + if [[ "$OS" == "Linux" ]]; then + _pf_fail_line "Memory: only ${gb} GB (${src}) — need ≥ ${PF_MIN_MEM_GB} GB to run the tracebloc client." + PF_HARD_FAIL=$(( ${PF_HARD_FAIL:-0} + 1 )) + hint "Resize the VM (or free memory) to ≥ ${PF_WARN_MEM_GB} GB; ${PF_REC_MEM_GB} GB to train locally. Then re-run." + else + # Mac/Win: at preflight Docker is usually still down, so this is host RAM — + # warn (don't block); the create_cluster re-check sees the real VM size. + warn "Memory: ${gb} GB (${src}) — below the ${PF_MIN_MEM_GB} GB the client needs; it will OOM." + hint "Docker Desktop → Settings → Resources → Memory: raise to ≥ ${PF_WARN_MEM_GB} GB (${PF_REC_MEM_GB} GB to train), then re-run." + fi + elif [[ "$mib" -lt "$warn_mib" ]]; then + warn "Memory: ${gb} GB (${src}) — enough to run, but training (≈8 GB/job) may OOM; ${PF_REC_MEM_GB} GB recommended to train locally." + [[ "$OS" != "Linux" ]] && hint "Docker Desktop → Settings → Resources → Memory ≥ ${PF_REC_MEM_GB} GB to train." else - success "Memory: ${gb} GB" + success "Memory: ${gb} GB (${src})" + fi + + # Linux: even when total is fine, a busy shared VM may have little free RAM now. + if [[ "$OS" == "Linux" ]]; then + local avail_kb avail_gb + avail_kb="$(_pf_avail_mem_kb)" + if [[ -n "$avail_kb" ]]; then + avail_gb=$(( avail_kb / 1024 / 1024 )) + if [[ "$avail_gb" -lt "$PF_MIN_MEM_GB" ]]; then + warn "Memory: only ${avail_gb} GB available right now (other workloads are using this machine) — the client needs ~${PF_MIN_MEM_GB} GB free to start." + fi + fi + fi + return 0 +} + +# Re-evaluate memory once Docker is confirmed up. Preflight runs before Docker +# starts (install-k8s.sh), so on macOS/Windows the first read was host RAM, not the +# Docker VM's smaller budget. Called from create_cluster (cluster.sh) — the first +# point `docker info` is reliably up on every OS. WARN-only: the user has already +# waited for Docker to come up, so aborting here would be jarring. +_pf_recheck_runtime_mem() { + [[ -n "${TRACEBLOC_SKIP_PREFLIGHT:-}" ]] && return 0 + local kb gb; kb="$(_pf_runtime_mem_kb)" + [[ -z "$kb" ]] && return 0 # daemon still not reporting — nothing to add + gb=$(( kb / 1024 / 1024 )) + if [[ $(( kb / 1024 )) -lt $(( PF_WARN_MEM_GB * 1024 )) ]]; then + warn "Docker is running with ${gb} GB — recommended ≥ ${PF_WARN_MEM_GB} GB (${PF_REC_MEM_GB} GB to train); the client may OOM under load." + [[ "$OS" != "Linux" ]] && hint "Docker Desktop → Settings → Resources → Memory ≥ ${PF_WARN_MEM_GB} GB, then re-install." fi return 0 } diff --git a/scripts/lib/setup-macos.sh b/scripts/lib/setup-macos.sh index 2e2d942..c7d44bf 100755 --- a/scripts/lib/setup-macos.sh +++ b/scripts/lib/setup-macos.sh @@ -69,7 +69,9 @@ _install_docker_colima() { return fi - spin_cmd "Starting Docker runtime…" colima start --cpu 2 --memory 4 --disk 60 + # Colima VM sizing must clear the preflight floor — the client needs ~5 GB just + # to run (control plane + k3s + OS), 16 GB to train locally. Overridable per box. + spin_cmd "Starting Docker runtime…" colima start --cpu "${COLIMA_CPU:-4}" --memory "${COLIMA_MEMORY:-6}" --disk "${COLIMA_DISK:-60}" if ! docker info &>/dev/null 2>&1; then error "Docker did not start. Try running 'colima status' to investigate." diff --git a/scripts/tests/e2e-cluster.sh b/scripts/tests/e2e-cluster.sh index d5b80f2..f833d57 100644 --- a/scripts/tests/e2e-cluster.sh +++ b/scripts/tests/e2e-cluster.sh @@ -33,6 +33,8 @@ source "$LIB/common.sh" source "$LIB/setup-linux.sh" # shellcheck source=/dev/null source "$LIB/cluster.sh" +# shellcheck source=/dev/null +source "$LIB/preflight.sh" # provides _pf_recheck_runtime_mem (called by create_cluster) cleanup() { k3d cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true; } trap cleanup EXIT diff --git a/scripts/tests/install-k8s.Tests.ps1 b/scripts/tests/install-k8s.Tests.ps1 index da64161..d6621a0 100644 --- a/scripts/tests/install-k8s.Tests.ps1 +++ b/scripts/tests/install-k8s.Tests.ps1 @@ -453,6 +453,45 @@ Describe "Test-Preflight" { Mock Test-PfUrl { "ok" } { Test-Preflight } | Should -Not -Throw } + It "memory below floor -> warn-only on Windows (does not throw)" { + Mock Test-PfUrl { "ok" }; Mock Get-PfMemGb { 3 } + { Test-Preflight } | Should -Not -Throw + } + It "PF_MIN_MEM_GB override relaxes the floor" { + Mock Test-PfUrl { "ok" }; Mock Get-PfMemGb { 3 }; $env:PF_MIN_MEM_GB = "2" + { Test-Preflight } | Should -Not -Throw + $env:PF_MIN_MEM_GB = $null + } +} + +Describe "Get-Pf* runtime (Docker VM) view preference" { + It "Get-PfMemGb prefers docker MemTotal over the host" { + Mock docker { '8589934592' } # 8 GiB, in bytes + Get-PfMemGb | Should -Be 8 + } + It "Get-PfCpu prefers docker NCPU over the host" { + Mock docker { '2' } + Get-PfCpu | Should -Be 2 + } + It "Get-PfRuntimeMemGb: junk value -> null (forces host fallback)" { + Mock docker { 'lots' } + Get-PfRuntimeMemGb | Should -BeNullOrEmpty + } + It "Get-PfRuntimeMemGb: docker errors -> null" { + Mock docker { throw "daemon down" } + Get-PfRuntimeMemGb | Should -BeNullOrEmpty + } +} + +Describe "Test-PreflightRuntimeMem (post-Docker, warn-only)" { + It "small Docker VM -> warns, does not throw" { + Mock Get-PfRuntimeMemGb { 4 } + { Test-PreflightRuntimeMem } | Should -Not -Throw + } + It "daemon not reporting (null) -> no-op, does not throw" { + Mock Get-PfRuntimeMemGb { $null } + { Test-PreflightRuntimeMem } | Should -Not -Throw + } } # --- reboot persistence (Set-ClusterAutostart) ------------------------------- diff --git a/scripts/tests/preflight.bats b/scripts/tests/preflight.bats index a852c11..6fd5503 100644 --- a/scripts/tests/preflight.bats +++ b/scripts/tests/preflight.bats @@ -14,6 +14,9 @@ setup() { _pf_free_kb() { echo $((50 * 1024 * 1024)); } # 50 GB _pf_total_mem_kb() { echo $((8 * 1024 * 1024)); } # 8 GB _pf_ncpu() { echo 4; } + _pf_runtime_mem_kb() { echo ""; } # daemon "down" in tests → selectors/src use host + _pf_runtime_ncpu() { echo ""; } + _pf_avail_mem_kb() { echo $((50 * 1024 * 1024)); } # 50 GB available (Linux warn off) _pf_amd64_emulation_available() { return 0; } docker() { return 1; } # keep _pf_docker_root off the real daemon has() { return 0; } # pretend tools present (conds empty) unless overridden @@ -118,14 +121,47 @@ setup() { PF_HARD_FAIL=0; _pf_disk >/dev/null; [ "$PF_HARD_FAIL" -eq 0 ] } -@test "_pf_memory: low RAM -> warn" { - _pf_total_mem_kb() { echo $((2 * 1024 * 1024)); } - run _pf_memory; [[ "$output" == *"recommended"* ]] +@test "_pf_memory: below floor on Linux -> hard fail + resize hint" { + OS=Linux; _pf_total_mem_kb() { echo $((3 * 1024 * 1024)); } # 3 GB + run _pf_memory; [[ "$output" == *"to run the tracebloc client"* ]] + PF_HARD_FAIL=0; _pf_memory >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 1 ] +} + +@test "_pf_memory: between floor and warn -> warn, no hard fail" { + OS=Linux; _pf_total_mem_kb() { echo $((6 * 1024 * 1024)); } # 6 GB + run _pf_memory; [[ "$output" == *"recommended to train"* ]] + PF_HARD_FAIL=0; _pf_memory >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 0 ] } @test "_pf_memory: ample RAM -> success" { - _pf_total_mem_kb() { echo $((8 * 1024 * 1024)); } - run _pf_memory; [[ "$output" == *"8 GB"* ]] + OS=Linux; _pf_total_mem_kb() { echo $((16 * 1024 * 1024)); } + run _pf_memory; [[ "$output" == *"16 GB"* ]] + PF_HARD_FAIL=0; _pf_memory >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 0 ] +} + +@test "_pf_memory: macOS below floor -> WARN only, never hard fail" { + OS=Darwin; _pf_total_mem_kb() { echo $((3 * 1024 * 1024)); } + run _pf_memory; [[ "$output" == *"Settings"* ]] + PF_HARD_FAIL=0; _pf_memory >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 0 ] +} + +@test "_pf_memory: 64 MiB grace -> a hair under the floor still passes" { + OS=Linux; _pf_total_mem_kb() { echo $(( 5 * 1024 * 1024 - 1000 )); } # ~5 GB minus a bit + PF_HARD_FAIL=0; _pf_memory >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 0 ] +} + +@test "_pf_memory: PF_MIN_MEM_GB override relaxes the floor" { + OS=Linux; PF_MIN_MEM_GB=2; PF_WARN_MEM_GB=2 + _pf_total_mem_kb() { echo $((3 * 1024 * 1024)); } # 3 GB now passes + run _pf_memory; [[ "$output" == *"3 GB"* ]] + PF_HARD_FAIL=0; _pf_memory >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 0 ] +} + +@test "_pf_memory: Linux MemAvailable tight -> extra warn (total fine)" { + OS=Linux; _pf_total_mem_kb() { echo $((16 * 1024 * 1024)); } # total fine + _pf_avail_mem_kb() { echo $((2 * 1024 * 1024)); } # only 2 GB free now + run _pf_memory; [[ "$output" == *"available right now"* ]] + PF_HARD_FAIL=0; _pf_memory >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 0 ] } @test "_pf_cpu: too few cores -> warn" { @@ -138,6 +174,56 @@ setup() { run _pf_cpu; [[ "$output" == *"4 cores"* ]] } +@test "_pf_cpu: between min and recommended -> warn (train), no hard fail" { + _pf_ncpu() { echo 3; } + run _pf_cpu; [[ "$output" == *"recommended to train"* ]] + PF_HARD_FAIL=0; _pf_cpu >/dev/null; [ "$PF_HARD_FAIL" -eq 0 ] # CPU never hard-fails +} + +# ── selectors: container-runtime view preferred, host fallback ─────────────── +@test "_pf_total_mem_kb: prefers runtime view over host (the Mac trap)" { + source "${BATS_TEST_DIRNAME}/../lib/preflight.sh" # restore the real selectors + _pf_runtime_mem_kb() { echo $((4 * 1024 * 1024)); } # Docker VM = 4 GB + _pf_host_mem_kb() { echo $((36 * 1024 * 1024)); } # host = 36 GB + run _pf_total_mem_kb; [ "$output" -eq $((4 * 1024 * 1024)) ] +} + +@test "_pf_total_mem_kb: falls back to host when runtime empty" { + source "${BATS_TEST_DIRNAME}/../lib/preflight.sh" + _pf_runtime_mem_kb() { echo ""; } + _pf_host_mem_kb() { echo $((8 * 1024 * 1024)); } + run _pf_total_mem_kb; [ "$output" -eq $((8 * 1024 * 1024)) ] +} + +@test "_pf_ncpu: prefers runtime, falls back to host" { + source "${BATS_TEST_DIRNAME}/../lib/preflight.sh" + _pf_runtime_ncpu() { echo 2; }; _pf_host_ncpu() { echo 16; } + run _pf_ncpu; [ "$output" -eq 2 ] + _pf_runtime_ncpu() { echo ""; } + run _pf_ncpu; [ "$output" -eq 16 ] +} + +@test "_pf_runtime_mem_kb: junk/zero MemTotal -> empty (forces fallback)" { + source "${BATS_TEST_DIRNAME}/../lib/preflight.sh" + has() { return 0; } + docker() { case "$*" in *MemTotal*) echo 0 ;; *) return 0 ;; esac; } + run _pf_runtime_mem_kb; [ -z "$output" ] +} + +# ── _pf_recheck_runtime_mem (post-Docker, warn-only) ───────────────────────── +@test "_pf_recheck_runtime_mem: small Docker VM -> warn, never hard fail" { + source "${BATS_TEST_DIRNAME}/../lib/preflight.sh" + OS=Linux; _pf_runtime_mem_kb() { echo $((4 * 1024 * 1024)); } # 4 GB Docker VM + run _pf_recheck_runtime_mem; [[ "$output" == *"Docker is running with 4 GB"* ]] + PF_HARD_FAIL=0; _pf_recheck_runtime_mem >/dev/null 2>&1; [ "$PF_HARD_FAIL" -eq 0 ] +} + +@test "_pf_recheck_runtime_mem: daemon not reporting -> silent no-op" { + source "${BATS_TEST_DIRNAME}/../lib/preflight.sh" + _pf_runtime_mem_kb() { echo ""; } + run _pf_recheck_runtime_mem; [ -z "$output" ] +} + # ── run_preflight orchestration ────────────────────────────────────────────── @test "run_preflight: TRACEBLOC_SKIP_PREFLIGHT -> skipped, exit 0" { export TRACEBLOC_SKIP_PREFLIGHT=1