nprodromou · claude-prodromou · May 8, 2026 · May 7, 2026 · May 7, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -219,6 +219,17 @@ RUN groupadd -g 1000 ${AGENT} \
     && mkdir -p /home/${AGENT}/.config /home/${AGENT}/workspace \
     && chown -R ${AGENT}:${AGENT} /home/${AGENT}
 
+# Per-agent default config baseline. Copied into /etc/<agent>-defaults/
+# at build time; the entrypoint applies these BEFORE the ConfigMap
+# overlay at /etc/<agent>-config/, so a deployment without a ConfigMap
+# still gets sensible runtime config and the ConfigMap only needs to
+# carry the deltas.
+RUN mkdir -p "/etc/${AGENT}-defaults"
+COPY defaults/ /etc/defaults-staging/
+RUN if [ -f "/etc/defaults-staging/${AGENT}-config.toml" ]; then \
+      cp "/etc/defaults-staging/${AGENT}-config.toml" "/etc/${AGENT}-defaults/config.toml"; \
+    fi && rm -rf /etc/defaults-staging
+
 # Entrypoint + bash profile.
 COPY --chmod=0755 bin/entrypoint.sh /usr/local/bin/entrypoint.sh
 COPY --chown=${AGENT}:${AGENT} profile/.bashrc    /home/${AGENT}/.bashrc

diff --git a/bin/entrypoint.sh b/bin/entrypoint.sh
@@ -97,14 +97,56 @@ claude)
     ;;
 esac
 
-# Sync managed config from a ConfigMap mounted at /etc/<agent>-config/.
+# Layer 1 — image-baked defaults at /etc/<agent>-defaults/.
+# Provide sensible defaults so a deployment without a ConfigMap still
+# gets a working runtime config. The ConfigMap overlay below wins on
+# any key it also sets. Today this carries the Codex sandbox/approval
+# baseline (see defaults/codex-config.toml; OPS-405).
+#
+# cp -afL: -a recurses + preserves attributes, -L dereferences symlinks.
+# Failures exit FATAL rather than being masked — same pattern as the
+# ConfigMap overlay below (OPS-406, codex-shell#10).
+AGENT_DEFAULTS_DIR="/etc/${AGENT}-defaults"
+if [ -d "${AGENT_DEFAULTS_DIR}" ]; then
+    if ! cp -afL "${AGENT_DEFAULTS_DIR}/." "${AGENT_CONFIG_DIR}/"; then
+        echo "FATAL: failed to sync image defaults from ${AGENT_DEFAULTS_DIR} to ${AGENT_CONFIG_DIR}" >&2
+        exit 1
+    fi
+    chmod -R u+w "${AGENT_CONFIG_DIR}" 2>/dev/null || true
+
+    # Smoke check: if the defaults dir has any files, at least one must
+    # have landed in the destination. Catches silent permission/path
+    # failures that would otherwise mask a non-functional baseline.
+    if [ -n "$(find "${AGENT_DEFAULTS_DIR}" -mindepth 1 -print -quit 2>/dev/null)" ] \
+        && [ -z "$(find "${AGENT_CONFIG_DIR}" -mindepth 1 -print -quit 2>/dev/null)" ]; then
+        echo "FATAL: defaults sync ran but ${AGENT_CONFIG_DIR} is empty" >&2
+        exit 1
+    fi
+fi
+
+# Layer 2 — managed config from a ConfigMap mounted at /etc/<agent>-config/.
 # The ConfigMap (apk8s repo) is the source of truth for model/MCP config;
 # in-pod edits get blown away on restart. Stakater Reloader restarts the
-# pod when the ConfigMap changes.
+# pod when the ConfigMap changes. Per-deployment overrides go here.
+#
+# cp -afL: -a recurses + preserves attributes, -L dereferences the
+# symlink farm that ConfigMap mounts use. The previous `cp -fL` skipped
+# subdirectories entirely and silently dropped managed config (OPS-406).
 if [ -d "${AGENT_CONFIG_SOURCE}" ]; then
-    # cp -L follows symlinks (configmap mounts are symlink farms).
-    cp -fL "${AGENT_CONFIG_SOURCE}/." "${AGENT_CONFIG_DIR}/" 2>/dev/null || true
+    if ! cp -afL "${AGENT_CONFIG_SOURCE}/." "${AGENT_CONFIG_DIR}/"; then
+        echo "FATAL: failed to sync managed config from ${AGENT_CONFIG_SOURCE} to ${AGENT_CONFIG_DIR}" >&2
+        exit 1
+    fi
     chmod -R u+w "${AGENT_CONFIG_DIR}" 2>/dev/null || true
+
+    # Smoke check: if the ConfigMap mount has any files, at least one
+    # must have landed in the destination. Catches silent
+    # permission/path failures that previously masked stale config.
+    if [ -n "$(find "${AGENT_CONFIG_SOURCE}" -mindepth 1 -print -quit 2>/dev/null)" ] \
+        && [ -z "$(find "${AGENT_CONFIG_DIR}" -mindepth 1 -print -quit 2>/dev/null)" ]; then
+        echo "FATAL: managed config sync ran but ${AGENT_CONFIG_DIR} is empty" >&2
+        exit 1
+    fi
 fi
 
 # Pull nprodromou/agent-config for the canonical Nate-org instructions

diff --git a/defaults/codex-config.toml b/defaults/codex-config.toml
@@ -0,0 +1,37 @@
+# Default Codex CLI runtime config for the codex-shell pod.
+#
+# Layered with /etc/codex-config (apk8s ConfigMap) at entrypoint time —
+# image defaults are applied first, then the ConfigMap overlay wins on
+# any key it sets. So this file is the baseline; per-deployment tweaks
+# go in apk8s.
+#
+# Why these values:
+#
+# - `sandbox_mode = "danger-full-access"`: the pod itself is the
+#   security boundary (non-root user, restricted RBAC, PVC isolation).
+#   Codex's internal bubblewrap layer is redundant in this deployment
+#   and was failing on `bwrap: No permissions to create new namespace`
+#   in apk8s pods that don't allow unprivileged user namespaces (most
+#   hardened k8s clusters). Disabling the inner sandbox means commands
+#   no longer escalate-on-bwrap-failure for every read.
+#
+# - `approval_policy = "on-failure"`: with the inner sandbox off, no
+#   sandbox-failure escalations happen. The user only gets prompted
+#   when a command genuinely fails. Combined with full-access this is
+#   functionally "no per-command prompts" — appropriate for a trusted
+#   agent pod, not for an unrestricted user shell.
+#
+# - The `[projects."/home/codex/workspace"]` trust entry mirrors what
+#   the live config already had (per OPS-405 description) and makes
+#   the trust explicit at image-default level.
+#
+# To tighten later (e.g., re-enable inner sandbox once unprivileged
+# user-namespace-clone is enabled at the kubelet/sysctl level), set
+# `sandbox_mode = "workspace-write"` in the apk8s ConfigMap; this
+# baseline doesn't need to change.
+
+sandbox_mode   = "danger-full-access"
+approval_policy = "on-failure"
+
+[projects."/home/codex/workspace"]
+trust_level = "trusted"