diff --git a/scripts/tests/e2e-proxy.sh b/scripts/tests/e2e-proxy.sh index aaf55b0..5ac9d5c 100644 --- a/scripts/tests/e2e-proxy.sh +++ b/scripts/tests/e2e-proxy.sh @@ -133,5 +133,117 @@ if ! echo "$plog" | grep -E 'CONNECT .*auth\.docker\.io' | grep -q "$PROXY_USER" error "No authenticated auth.docker.io CONNECT in the proxy log — the node's image pull did not traverse the proxy." fi +# ── 4. APPLICATION-pod egress through a proxy (client-runtime#119) ──────────── +# §1-3 prove NODE egress (image pulls) through the AUTHENTICATED host squid. But +# the ingestion Job and training pods are application pods that POST to the +# backend via requests/urllib3 — they only traverse a proxy if their POD env +# carries HTTP(S)_PROXY (build_job_spec / jobs_manager._add_environment_variables). +# That layer is what client-runtime#119 was about, and §3 never touches it. +# +# A pod cannot reach the host squid via host.k3d.internal (that alias is for k3d +# NODES, not pod DNS), so we stand up an in-cluster squid the pods reach by +# Service DNS — a closer model of a real corporate proxy reachable by name. Auth +# survival is already covered by §1-3; this section is about proxy-env ROUTING. +# One pod carries the ingestion-style proxy env and makes two calls to the SAME +# backend (one pod / two calls = deterministic; no multi-pod scheduling or +# log-flush race to flake on): +# * WITH the proxy env it reaches the backend THROUGH the squid (the fixed +# ingestion Job); +# * with that env unset the same call bypasses the squid / dials direct (the +# pre-fix Job — in a real proxy-only network like Charité that direct dial is +# refused with [Errno 111]; here the node has direct egress, so we assert the +# *absence* of a proxied CONNECT). +echo "── deploying an in-cluster squid the test pods can reach by Service DNS ──" +kubectl apply -f - <<'YAML' +apiVersion: v1 +kind: ConfigMap +metadata: { name: tb-egress-squid } +data: + squid.conf: | + acl SSL_ports port 443 + acl CONNECT method CONNECT + http_access deny CONNECT !SSL_ports + http_access allow all + http_port 3128 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: { name: tb-egress-squid, labels: { app: tb-egress-squid } } +spec: + replicas: 1 + selector: { matchLabels: { app: tb-egress-squid } } + template: + metadata: { labels: { app: tb-egress-squid } } + spec: + containers: + - name: squid + image: ubuntu/squid:latest + ports: [{ containerPort: 3128 }] + # Gate rollout on squid actually LISTENING, so the probe pods below + # don't race a not-yet-bound port (the "connect refused after 1ms"). + readinessProbe: + tcpSocket: { port: 3128 } + initialDelaySeconds: 2 + periodSeconds: 2 + volumeMounts: + - { name: conf, mountPath: /etc/squid/squid.conf, subPath: squid.conf } + volumes: + - { name: conf, configMap: { name: tb-egress-squid } } +--- +apiVersion: v1 +kind: Service +metadata: { name: tb-egress-squid } +spec: + selector: { app: tb-egress-squid } + ports: [{ port: 3128, targetPort: 3128 }] +YAML +kubectl rollout status deploy/tb-egress-squid --timeout=180s + +# Mirrors _EGRESS_NO_PROXY / the chart's cluster-safe NO_PROXY: in-cluster direct. +APP_PROXY_URL="http://tb-egress-squid.default.svc.cluster.local:3128" +APP_NO_PROXY="localhost,127.0.0.1,mysql-client,requests-proxy-service,.svc,.svc.cluster.local,.cluster.local,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16" + +# ONE pod carrying the ingestion-style proxy env (BOTH cases — curl honours the +# lower-case `https_proxy` for HTTPS; the real ingestion env emits both, so the +# probe must too or it silently dials direct and the test is a lie). It makes two +# calls to the SAME backend: (A) with the proxy env it must traverse the squid via +# a CONNECT tunnel; (B) with the proxy env unset it must dial direct. A single pod +# keeps this deterministic — no multi-pod scheduling / log-flush race to flake on. +echo "── one app pod: WITH the ingestion proxy env it must tunnel via the squid; with it unset it must dial direct ──" +kubectl run egress-app --image=curlimages/curl:latest --restart=Never \ + --env="HTTP_PROXY=${APP_PROXY_URL}" --env="HTTPS_PROXY=${APP_PROXY_URL}" \ + --env="http_proxy=${APP_PROXY_URL}" --env="https_proxy=${APP_PROXY_URL}" \ + --env="NO_PROXY=${APP_NO_PROXY}" --env="no_proxy=${APP_NO_PROXY}" \ + --command -- sh -c ' + echo ">>>>> SECTION_A_WITH_PROXY_ENV"; + curl -v -sS -m 20 -o /dev/null https://api.tracebloc.io/ 2>&1; + echo ">>>>> SECTION_B_PROXY_ENV_UNSET"; + env -u HTTP_PROXY -u HTTPS_PROXY -u http_proxy -u https_proxy -u NO_PROXY -u no_proxy curl -v -sS -m 20 -o /dev/null https://api.tracebloc.io/ 2>&1; + echo ">>>>> SECTION_END"' + +# Wait for the pod to finish, then read its single log once. +for _ in $(seq 1 90); do + phase="$(kubectl get pod egress-app -o jsonpath='{.status.phase}' 2>/dev/null || true)" + [[ "$phase" == "Succeeded" || "$phase" == "Failed" ]] && break + sleep 2 +done +applog="$(kubectl logs egress-app 2>/dev/null || true)" +a_section="$(printf '%s\n' "$applog" | awk '/SECTION_A_WITH_PROXY_ENV/{f=1;next} /SECTION_B_PROXY_ENV_UNSET/{f=0} f')" +b_section="$(printf '%s\n' "$applog" | awk '/SECTION_B_PROXY_ENV_UNSET/{f=1;next} /SECTION_END/{f=0} f')" + +# Proof is CLIENT-side from `curl -v` — deterministic, unlike squid's access log +# which the log daemon buffers and may not have flushed when we read it. +printf '%s\n' "$a_section" | grep -iE 'Establish HTTP proxy tunnel|CONNECT tunnel established|< HTTP/1.1 200' | sed 's/^/ A WITH proxy env: /' +printf '%s\n' "$b_section" | grep -iE 'Trying|Connected to|< HTTP/1.1 200' | sed 's/^/ B env unset: /' +# (A) WITH the ingestion proxy env, the backend call MUST traverse the squid. +if ! printf '%s' "$a_section" | grep -qiE 'Establish HTTP proxy tunnel to api\.tracebloc\.io|CONNECT tunnel established'; then + error "App pod WITH the ingestion proxy env did NOT tunnel through the squid — ingestion-style backend egress is not proxied (the #119 bug)." +fi +# (B) With the env unset, the SAME call MUST NOT use a proxy (it dials direct). +if printf '%s' "$b_section" | grep -qiE 'proxy tunnel|CONNECT tunnel established'; then + error "App pod with the proxy env unset still used a proxy — unexpected; that path should dial direct." +fi +success "App-pod egress verified: WITH the ingestion proxy env the backend call tunnelled through the in-cluster squid; with it unset the same call dialled direct." + echo "" -echo "E2E PASS: cluster came up via an AUTHENTICATED proxy and pulled a workload through it." +echo "E2E PASS: cluster came up via an AUTHENTICATED proxy, pulled a workload through it, and an ingestion-style app pod egressed to the backend through a proxy (a no-proxy pod bypassed it)."