diff --git a/.github/workflows/standard-checks.yml b/.github/workflows/standard-checks.yml new file mode 100644 index 0000000..d5b449c --- /dev/null +++ b/.github/workflows/standard-checks.yml @@ -0,0 +1,59 @@ +name: Standard checks + +# Emits the org-standard required status checks **Lint** and **Unit tests** for +# this (shell / Helm) repo, so branch-protection rules that require those exact +# contexts are satisfied and PRs can merge. +# +# IMPORTANT — deliberately NOT path-filtered. A *required* status check must +# report on EVERY PR to the protected branch; if it were gated on `paths:`, a PR +# that didn't touch those paths would never produce the check and would sit +# forever at "Expected — waiting for status to be reported", blocking the merge. +# +# These jobs reuse the same proven steps as `installer-tests.yaml`'s deeper +# suite (which keeps the broader coverage: PSScriptAnalyzer, Pester on Windows, +# distro-prereqs, e2e). If the two ever feel redundant, fold the deep suite's +# names into these and drop the duplicates — but keep SOMETHING reporting +# `Lint` + `Unit tests` unconditionally. + +on: + push: + branches: [main, develop, openshift] + pull_request: + branches: [main, develop, openshift] + workflow_dispatch: + +permissions: + contents: read + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: bash -n (syntax) on every shell script + run: | + find scripts -type f -name '*.sh' -print0 \ + | while IFS= read -r -d '' f; do bash -n "$f" || exit 1; done + echo "all shell scripts parse" + + - name: ShellCheck (libs + entrypoints), error severity + run: | + sudo apt-get update -qq && sudo apt-get install -y -qq shellcheck + shellcheck --version | grep version + shellcheck --severity=error --shell=bash \ + scripts/install.sh scripts/install-k8s.sh scripts/lib/*.sh \ + scripts/tests/distro-prereqs.sh scripts/tests/e2e-cluster.sh scripts/tests/e2e-proxy.sh + + unit-tests: + name: Unit tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install bats + run: sudo apt-get update -qq && sudo apt-get install -y -qq bats + + - name: Run bats (bash unit suite, mocked) + run: bats scripts/tests/*.bats diff --git a/README.md b/README.md index 014ba0e..5258d12 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ This repo ships the **tracebloc** unified Helm chart (currently `v1.3.5`) — on ### Quick install -A single command provisions a Kubernetes cluster, auto-detects and installs GPU drivers (NVIDIA or AMD), and deploys the tracebloc client. Use this when you don't already have a cluster — the result is a full client install, not a demo. +A single command provisions a Kubernetes cluster, auto-detects and installs GPU drivers (NVIDIA or AMD), deploys the tracebloc client, and installs the [tracebloc CLI](https://github.com/tracebloc/cli) (`tracebloc dataset push`). Use this when you don't already have a cluster — the result is a full client install, not a demo. **macOS / Linux** diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 1a3bdb7..78772aa 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -2,7 +2,7 @@ This guide covers installing the **tracebloc** unified Helm chart (AKS, EKS, bare-metal, OpenShift) in a production-ready way. -> **Don't have a Kubernetes cluster yet?** The standalone installer provisions a cluster, installs GPU drivers, and deploys a full tracebloc client in a single command: +> **Don't have a Kubernetes cluster yet?** The standalone installer provisions a cluster, installs GPU drivers, deploys a full tracebloc client, and installs the [tracebloc CLI](https://github.com/tracebloc/cli) — in a single command: > > - **macOS / Linux:** `bash <(curl -fsSL https://tracebloc.io/i.sh)` > - **Windows:** `irm https://tracebloc.io/i.ps1 | iex` *(PowerShell as Administrator)* diff --git a/scripts/install-k8s.ps1 b/scripts/install-k8s.ps1 index c890bea..0830fc7 100644 --- a/scripts/install-k8s.ps1 +++ b/scripts/install-k8s.ps1 @@ -242,6 +242,7 @@ function Print-Roadmap { Hint "2. Set up secure compute environment" Hint "3. Install tracebloc client" Hint "4. Connect to tracebloc network" + Hint "5. Install the tracebloc CLI" Write-Host "" } @@ -1008,7 +1009,7 @@ function Test-Credentials { function Install-ClientHelm { # -- Step 3/4: Install tracebloc client -- - Step 3 4 "Installing tracebloc client" + Step 3 5 "Installing tracebloc client" if (-not (Test-Path $HOST_DATA_DIR)) { New-Item -ItemType Directory -Path $HOST_DATA_DIR -Force | Out-Null @@ -1045,7 +1046,7 @@ function Install-ClientHelm { $script:TB_NAMESPACE = $TB_NAMESPACE # share with Wait-ForClientReady / Print-Summary # -- Step 4/4: Connect to tracebloc network -- - Step 4 4 "Connect to tracebloc network" + Step 4 5 "Connect to tracebloc network" PromptHeader "To connect this machine, you need a tracebloc client." Hint "A client links your secure environment to the tracebloc" @@ -1308,7 +1309,8 @@ function Print-Summary { Hint "After a reboot, start Docker Desktop to bring your client back (enable 'Start Docker Desktop when you sign in' in Settings -> General to automate)." Write-Host "" Write-Host " What to do next" -ForegroundColor White - Write-Host " 1. Ingest your training and test data" + Write-Host " 1. Ingest your training and test data with the tracebloc CLI:" + Write-Host " tracebloc dataset push ./data" -ForegroundColor Cyan Write-Host " 2. Define your first AI use case and invite vendors" Write-Host "" Hint "Dashboard: https://ai.tracebloc.io Logs: ~\.tracebloc\ Data: /tracebloc/$ns" @@ -1558,6 +1560,66 @@ function Invoke-DiagnoseBundle { } } +# ============================================================================= +# INSTALL TRACEBLOC CLI (Step 5) +# ============================================================================= +# Installs the `tracebloc` CLI via its own released installer (tracebloc/cli), +# which downloads the right build for this OS/arch and verifies it (SHA256 + +# cosign signature). Lets the user push datasets to the client they just set +# up: tracebloc dataset push ./data +# +# NON-FATAL: runs after the client is connected, so a CLI-install hiccup warns +# and moves on. The CLI's own installer sets $ErrorActionPreference='Stop' and +# exits on failure, so we run it in a CHILD powershell process — its exit can +# never abort THIS installer. +$TRACEBLOC_CLI_INSTALL_URL = "https://github.com/tracebloc/cli/releases/latest/download/install.ps1" + +function Install-TraceblocCli { + Step 5 5 "Install the tracebloc CLI" + + if (Has "tracebloc") { + Info "tracebloc CLI already present -- re-running its installer to pick up the latest." + } + Info "Installing the tracebloc CLI (dataset push / cluster info / dataset rm)..." + + # [System.IO.Path]::GetTempPath() is cross-platform (%TEMP% on Windows, /tmp + # on Linux); $env:TEMP is null under Linux pwsh, which the ubuntu Pester run + # exercises. + $cliOut = Join-Path ([System.IO.Path]::GetTempPath()) "tracebloc-cli-install-$(Get-Random).log" + $cliErr = "$cliOut.err" + try { + $p = Start-Process -FilePath "powershell.exe" ` + -ArgumentList @("-NoProfile","-ExecutionPolicy","Bypass","-Command","irm '$TRACEBLOC_CLI_INSTALL_URL' | iex") ` + -NoNewWindow -PassThru ` + -RedirectStandardOutput $cliOut -RedirectStandardError $cliErr + # Caching .Handle before the process exits, then WaitForExit(), makes + # .ExitCode reliable. (The -Wait -PassThru form can leave .ExitCode $null + # with redirected output; -PassThru + Handle + WaitForExit does not.) + $null = $p.Handle + $p.WaitForExit() + foreach ($f in @($cliOut, $cliErr)) { + if (Test-Path $f) { Get-Content $f -ErrorAction SilentlyContinue | ForEach-Object { Log $_ } } + } + # Installer exit status is the SOLE source of truth, mirroring the bash step + # (`if sh installer; then …`). Do NOT also accept "tracebloc already on PATH" + # as success — a failed re-install on a machine that already had the CLI + # would then be misreported as a success. + if ($p.ExitCode -eq 0) { + RefreshPath + Ok "tracebloc CLI installed -- open a new terminal so it's on your PATH." + } else { + Warn "Couldn't install the tracebloc CLI automatically -- your client is set up fine." + Hint "Install it later: irm $TRACEBLOC_CLI_INSTALL_URL | iex" + } + } catch { + Warn "Couldn't install the tracebloc CLI automatically -- your client is set up fine." + Hint "Install it later: irm $TRACEBLOC_CLI_INSTALL_URL | iex" + Log "CLI install failed: $_" + } finally { + Remove-Item $cliOut, $cliErr -Force -ErrorAction SilentlyContinue + } +} + # ============================================================================= # MAIN # ============================================================================= @@ -1573,8 +1635,8 @@ Start-InstallLog Print-Banner Print-Roadmap -# -- Step 1/4: Check system requirements -- -Step 1 4 "Checking system requirements" +# -- Step 1/5: Check system requirements -- +Step 1 5 "Checking system requirements" Test-Preflight Find-Gpu Enable-VirtualisationFeatures @@ -1584,17 +1646,21 @@ Install-NvidiaContainerToolkit Install-Kubectl Install-K3dAndHelm -# -- Step 2/4: Set up secure compute environment -- -Step 2 4 "Setting up secure compute environment" +# -- Step 2/5: Set up secure compute environment -- +Step 2 5 "Setting up secure compute environment" New-K3dCluster Install-GpuDevicePlugin Confirm-GpuNode -# -- Steps 3/4 + 4/4 handled inside Install-ClientHelm -- +# -- Steps 3/5 + 4/5 handled inside Install-ClientHelm -- Install-ClientHelm # Verify the client actually came up before reporting anything Wait-ForClientReady + +# -- Step 5/5: install the tracebloc CLI (non-fatal; client is already up) -- +Install-TraceblocCli + Print-Summary try { Stop-Transcript | Out-Null } catch {} diff --git a/scripts/install-k8s.sh b/scripts/install-k8s.sh index 804e535..e708956 100755 --- a/scripts/install-k8s.sh +++ b/scripts/install-k8s.sh @@ -51,6 +51,14 @@ source "${LIB_DIR}/setup-linux.sh" source "${LIB_DIR}/cluster.sh" source "${LIB_DIR}/gpu-plugins.sh" source "${LIB_DIR}/install-client-helm.sh" +# install-cli.sh may be absent if an older bootstrap copy (e.g. a not-yet- +# updated tracebloc.io/i.sh, whose FILES list is hand-maintained) didn't fetch +# it. Guard the source so a stale bootstrap degrades gracefully (Step 5 is +# skipped) instead of aborting the whole installer under `set -e`. Use an `if` +# block, NOT `[[ -f … ]] && source` — a false `&&` test trips `set -e`. +if [[ -f "${LIB_DIR}/install-cli.sh" ]]; then + source "${LIB_DIR}/install-cli.sh" +fi source "${LIB_DIR}/summary.sh" source "${LIB_DIR}/diagnose.sh" @@ -70,7 +78,7 @@ main() { print_roadmap # ── Step 1/4: Check system requirements ────────────────────────────────── - step 1 4 "Checking system requirements" + step 1 5 "Checking system requirements" run_preflight detect_gpu @@ -84,16 +92,24 @@ main() { esac # ── Step 2/4: Set up secure compute environment ────────────────────────── - step 2 4 "Setting up secure compute environment" + step 2 5 "Setting up secure compute environment" create_cluster deploy_gpu_device_plugin verify_gpu - # ── Step 3/4 + 4/4 are handled inside install_client_helm ──────────────── + # ── Step 3/5 + 4/5 are handled inside install_client_helm ──────────────── install_client_helm # ── Verify the client actually came up before reporting anything ───────── wait_for_client_ready + + # ── Step 5/5: install the tracebloc CLI. Non-fatal — the client is already + # connected, so a CLI hiccup warns but never fails the run. Guarded on the + # function being defined, in case a stale bootstrap didn't fetch the lib. ─ + if declare -F install_tracebloc_cli >/dev/null 2>&1; then + install_tracebloc_cli + fi + print_summary # Exit code reflects reality: connected/starting are OK; failures are non-zero diff --git a/scripts/install.sh b/scripts/install.sh index 59be5a3..44a6119 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -42,6 +42,7 @@ FILES=( "scripts/lib/cluster.sh" "scripts/lib/gpu-plugins.sh" "scripts/lib/install-client-helm.sh" + "scripts/lib/install-cli.sh" "scripts/lib/summary.sh" "scripts/lib/diagnose.sh" ) diff --git a/scripts/lib/common.sh b/scripts/lib/common.sh index 96d08c0..5031960 100755 --- a/scripts/lib/common.sh +++ b/scripts/lib/common.sh @@ -347,6 +347,7 @@ print_roadmap() { echo -e " ${DIM}2. Set up secure compute environment${RESET}" echo -e " ${DIM}3. Install tracebloc client${RESET}" echo -e " ${DIM}4. Connect to tracebloc network${RESET}" + echo -e " ${DIM}5. Install the tracebloc CLI${RESET}" echo "" } diff --git a/scripts/lib/install-cli.sh b/scripts/lib/install-cli.sh new file mode 100644 index 0000000..c9d220e --- /dev/null +++ b/scripts/lib/install-cli.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# ============================================================================= +# install-cli.sh — Install the tracebloc CLI (Step 5) +# +# Installs the `tracebloc` command-line tool so the user can push datasets to +# the client they just set up: +# +# tracebloc dataset push ./data +# +# It does NOT reimplement any install logic — it runs the CLI's own released +# installer (github.com/tracebloc/cli), which downloads the right build for +# this OS/arch and verifies it (SHA256 + cosign signature) before installing. +# Keeping that logic in the cli repo means this stays correct as the CLI's +# platform matrix / signing evolves. +# +# NON-FATAL by design: this runs AFTER the client is already connected, so a +# CLI-install hiccup must warn and move on — it must never turn a successful +# "Connected to tracebloc" into a failed install. Every path returns 0, and +# detection does NOT rely on the caller's `set -o pipefail` (we download to a +# temp file and check each step explicitly rather than `curl | sh`). +# ============================================================================= + +TRACEBLOC_CLI_INSTALL_URL="https://github.com/tracebloc/cli/releases/latest/download/install.sh" + +install_tracebloc_cli() { + step 5 5 "Install the tracebloc CLI" + + if has tracebloc; then + # Version is cosmetic — never let a failing `tracebloc version` (or SIGPIPE + # from `head` closing the pipe, under `set -o pipefail`) abort this step. + # `local` masks the status and `|| true` keeps any captured value. + local ver="$(tracebloc version 2>/dev/null | head -1 || true)" + info "tracebloc CLI already present${ver:+ ($ver)} — re-running its installer to pick up the latest." + fi + + info "Installing the tracebloc CLI (dataset push / cluster info / dataset rm)…" + + local installer + installer="$(mktemp)" || { warn "Couldn't install the tracebloc CLI (no temp dir) — your client is set up fine."; return 0; } + + # 1) Download the released installer. A failure here is a download problem, + # distinct from an install problem below. + if ! curl -fsSL "$CURL_SECURE" "$TRACEBLOC_CLI_INSTALL_URL" -o "$installer" 2>>"${LOG_FILE:-/dev/null}"; then + warn "Couldn't download the tracebloc CLI installer — your client is set up fine." + hint "Install it later: curl -fsSL ${TRACEBLOC_CLI_INSTALL_URL} | sh" + rm -f "$installer" + return 0 + fi + + # 2) Run it. Output → install log to keep this screen clean. The CLI installer + # verifies SHA256 + cosign and falls back to ~/.local/bin (printing PATH + # guidance) when /usr/local/bin isn't writable. + if sh "$installer" >> "${LOG_FILE:-/dev/null}" 2>&1; then + if has tracebloc; then + local ver="$(tracebloc version 2>/dev/null | head -1 || true)" + success "tracebloc CLI installed${ver:+ ($ver)}." + else + success "tracebloc CLI installed — open a new terminal so it's on your PATH." + fi + else + warn "Couldn't install the tracebloc CLI automatically — your client is set up fine." + hint "Install it later: curl -fsSL ${TRACEBLOC_CLI_INSTALL_URL} | sh" + fi + + rm -f "$installer" + return 0 +} diff --git a/scripts/lib/install-client-helm.sh b/scripts/lib/install-client-helm.sh index 6cc95bc..41e0615 100644 --- a/scripts/lib/install-client-helm.sh +++ b/scripts/lib/install-client-helm.sh @@ -141,7 +141,7 @@ verify_credentials() { install_client_helm() { # ── Step 3/4: Install tracebloc client ─────────────────────────────────── - step 3 4 "Installing tracebloc client" + step 3 5 "Installing tracebloc client" _ensure_tracebloc_dirs local values_file="${HOST_DATA_DIR}/values.yaml" @@ -186,7 +186,7 @@ install_client_helm() { TB_NAMESPACE=$(_sanitize_workspace_name "${TB_NAMESPACE:-tracebloc}") # ── Step 4/4: Connect to tracebloc network ────────────────────────────── - step 4 4 "Connect to tracebloc network" + step 4 5 "Connect to tracebloc network" prompt_header "To connect this machine, you need a tracebloc client." hint "A client links your secure environment to the tracebloc" diff --git a/scripts/lib/summary.sh b/scripts/lib/summary.sh index ccf9f93..6282dcf 100755 --- a/scripts/lib/summary.sh +++ b/scripts/lib/summary.sh @@ -112,7 +112,8 @@ print_summary() { _reboot_note echo "" echo -e " ${BOLD}What to do next${RESET}" - echo -e " ${WHITE}1.${RESET} Ingest your training and test data" + echo -e " ${WHITE}1.${RESET} Ingest your training and test data with the tracebloc CLI:" + echo -e " ${CYAN}tracebloc dataset push ./data${RESET}" echo -e " ${WHITE}2.${RESET} Define your first AI use case and invite vendors" echo "" echo -e " ${DIM}Dashboard:${RESET} ${CYAN}https://ai.tracebloc.io${RESET} ${DIM}Logs:${RESET} ${DIM}~/.tracebloc/${RESET} ${DIM}Data:${RESET} ${DIM}/tracebloc/${ns}${RESET}" diff --git a/scripts/tests/install-cli.bats b/scripts/tests/install-cli.bats new file mode 100644 index 0000000..e392dd8 --- /dev/null +++ b/scripts/tests/install-cli.bats @@ -0,0 +1,48 @@ +#!/usr/bin/env bats +# Tests for scripts/lib/install-cli.sh — the tracebloc CLI install step (#201). +# +# The load-bearing property is that it is NON-FATAL: the client is already +# connected by the time install_tracebloc_cli runs, so a download or install +# failure must leave it returning 0 (the orchestrator runs under `set -e`; a +# non-zero return there would abort an otherwise-successful install). +load test_helper + +setup() { + load_lib install-cli.sh + # Stub the UI helpers (defined in common.sh in the real run) so we can assert + # on what the function reports. + step() { :; } + info() { :; } + success() { echo "SUCCESS: $*"; } + warn() { echo "WARN: $*"; } + hint() { :; } + has() { return 1; } # default: tracebloc not present + # CURL_SECURE is set readonly by common.sh (loaded via load_lib); don't + # reassign it. curl is mocked in every test below, so its value is moot. + LOG_FILE="$(mktemp)" +} + +@test "install_tracebloc_cli: download failure is non-fatal (returns 0, warns)" { + curl() { return 22; } # curl HTTP failure (exit 22) + run install_tracebloc_cli + [ "$status" -eq 0 ] + [[ "$output" == *"WARN: Couldn't download"* ]] +} + +@test "install_tracebloc_cli: installer-script failure is non-fatal (returns 0, warns)" { + curl() { : > "${@: -1}"; return 0; } # 'download' OK (creates the -o target) + sh() { return 1; } # the CLI installer itself fails + run install_tracebloc_cli + [ "$status" -eq 0 ] + [[ "$output" == *"WARN: Couldn't install"* ]] +} + +@test "install_tracebloc_cli: success path reports installed" { + curl() { : > "${@: -1}"; return 0; } + sh() { return 0; } + has() { return 0; } # tracebloc now resolvable + tracebloc() { echo "tracebloc 0.2.0"; } + run install_tracebloc_cli + [ "$status" -eq 0 ] + [[ "$output" == *"SUCCESS: tracebloc CLI installed"* ]] +} diff --git a/scripts/tests/install-k8s.Tests.ps1 b/scripts/tests/install-k8s.Tests.ps1 index fe39098..da64161 100644 --- a/scripts/tests/install-k8s.Tests.ps1 +++ b/scripts/tests/install-k8s.Tests.ps1 @@ -112,6 +112,54 @@ Describe "ConvertTo-WorkspaceName" { It "all-invalid -> default" { ConvertTo-WorkspaceName -Input_ "@@@" | Should -Be "default" } } +Describe "Install-TraceblocCli" { + # Step 5 of the installer: install the tracebloc CLI via its own released + # installer, run in a CHILD powershell process. The load-bearing property is + # NON-FATAL — a failure must Warn (not throw), since the client is already up. + BeforeEach { + Mock RefreshPath {} + Mock Has { $false } # tracebloc not already on PATH + } + # Fake the System.Diagnostics.Process that Start-Process -PassThru returns: + # the function caches .Handle, calls .WaitForExit(), then reads .ExitCode. + It "non-fatal: warns (does not throw) when the CLI installer exits non-zero" { + Mock Start-Process { + $o = [pscustomobject]@{ ExitCode = 1 } + $o | Add-Member ScriptProperty Handle { [IntPtr]::Zero } + $o | Add-Member ScriptMethod WaitForExit { } + $o + } + $out = Install-TraceblocCli 6>&1 | Out-String + $out | Should -Match "Couldn't install the tracebloc CLI" + } + It "non-fatal: warns (does not throw) when Start-Process itself throws" { + Mock Start-Process { throw "network down" } + $out = Install-TraceblocCli 6>&1 | Out-String + $out | Should -Match "Couldn't install the tracebloc CLI" + } + It "reports success only when the installer exits 0" { + Mock Start-Process { + $o = [pscustomobject]@{ ExitCode = 0 } + $o | Add-Member ScriptProperty Handle { [IntPtr]::Zero } + $o | Add-Member ScriptMethod WaitForExit { } + $o + } + $out = Install-TraceblocCli 6>&1 | Out-String + $out | Should -Match "tracebloc CLI installed" + } + It "warns on a failed re-install even when a CLI is already on PATH" { + Mock Start-Process { + $o = [pscustomobject]@{ ExitCode = 1 } + $o | Add-Member ScriptProperty Handle { [IntPtr]::Zero } + $o | Add-Member ScriptMethod WaitForExit { } + $o + } + Mock Has { $true } # a CLI is already present, but the installer failed… + $out = Install-TraceblocCli 6>&1 | Out-String + $out | Should -Match "Couldn't install the tracebloc CLI" # …so it must still warn + } +} + Describe "Get-WindowsArch" { AfterEach { $env:PROCESSOR_ARCHITECTURE = "AMD64" } It "AMD64 -> amd64" { $env:PROCESSOR_ARCHITECTURE = "AMD64"; Get-WindowsArch | Should -Be "amd64" }