Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 113 additions & 1 deletion scripts/tests/e2e-proxy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -133,5 +133,117 @@ if ! echo "$plog" | grep -E 'CONNECT .*auth\.docker\.io' | grep -q "$PROXY_USER"
error "No authenticated auth.docker.io CONNECT in the proxy log — the node's image pull did not traverse the proxy."
fi

# ── 4. APPLICATION-pod egress through a proxy (client-runtime#119) ────────────
# §1-3 prove NODE egress (image pulls) through the AUTHENTICATED host squid. But
# the ingestion Job and training pods are application pods that POST to the
# backend via requests/urllib3 — they only traverse a proxy if their POD env
# carries HTTP(S)_PROXY (build_job_spec / jobs_manager._add_environment_variables).
# That layer is what client-runtime#119 was about, and §3 never touches it.
#
# A pod cannot reach the host squid via host.k3d.internal (that alias is for k3d
# NODES, not pod DNS), so we stand up an in-cluster squid the pods reach by
# Service DNS — a closer model of a real corporate proxy reachable by name. Auth
# survival is already covered by §1-3; this section is about proxy-env ROUTING.
# One pod carries the ingestion-style proxy env and makes two calls to the SAME
# backend (one pod / two calls = deterministic; no multi-pod scheduling or
# log-flush race to flake on):
# * WITH the proxy env it reaches the backend THROUGH the squid (the fixed
# ingestion Job);
# * with that env unset the same call bypasses the squid / dials direct (the
# pre-fix Job — in a real proxy-only network like Charité that direct dial is
# refused with [Errno 111]; here the node has direct egress, so we assert the
# *absence* of a proxied CONNECT).
echo "── deploying an in-cluster squid the test pods can reach by Service DNS ──"
kubectl apply -f - <<'YAML'
apiVersion: v1
kind: ConfigMap
metadata: { name: tb-egress-squid }
data:
squid.conf: |
acl SSL_ports port 443
acl CONNECT method CONNECT
http_access deny CONNECT !SSL_ports
http_access allow all
http_port 3128
---
apiVersion: apps/v1
kind: Deployment
metadata: { name: tb-egress-squid, labels: { app: tb-egress-squid } }
spec:
replicas: 1
selector: { matchLabels: { app: tb-egress-squid } }
template:
metadata: { labels: { app: tb-egress-squid } }
spec:
containers:
- name: squid
image: ubuntu/squid:latest
ports: [{ containerPort: 3128 }]
# Gate rollout on squid actually LISTENING, so the probe pods below
# don't race a not-yet-bound port (the "connect refused after 1ms").
readinessProbe:
tcpSocket: { port: 3128 }
initialDelaySeconds: 2
periodSeconds: 2
volumeMounts:
- { name: conf, mountPath: /etc/squid/squid.conf, subPath: squid.conf }
volumes:
- { name: conf, configMap: { name: tb-egress-squid } }
---
apiVersion: v1
kind: Service
metadata: { name: tb-egress-squid }
spec:
selector: { app: tb-egress-squid }
ports: [{ port: 3128, targetPort: 3128 }]
YAML
kubectl rollout status deploy/tb-egress-squid --timeout=180s

# Mirrors _EGRESS_NO_PROXY / the chart's cluster-safe NO_PROXY: in-cluster direct.
APP_PROXY_URL="http://tb-egress-squid.default.svc.cluster.local:3128"
APP_NO_PROXY="localhost,127.0.0.1,mysql-client,requests-proxy-service,.svc,.svc.cluster.local,.cluster.local,10.0.0.0/8,172.16.0.0/12,192.168.0.0/16"

# ONE pod carrying the ingestion-style proxy env (BOTH cases — curl honours the
# lower-case `https_proxy` for HTTPS; the real ingestion env emits both, so the
# probe must too or it silently dials direct and the test is a lie). It makes two
# calls to the SAME backend: (A) with the proxy env it must traverse the squid via
# a CONNECT tunnel; (B) with the proxy env unset it must dial direct. A single pod
# keeps this deterministic — no multi-pod scheduling / log-flush race to flake on.
echo "── one app pod: WITH the ingestion proxy env it must tunnel via the squid; with it unset it must dial direct ──"
kubectl run egress-app --image=curlimages/curl:latest --restart=Never \
--env="HTTP_PROXY=${APP_PROXY_URL}" --env="HTTPS_PROXY=${APP_PROXY_URL}" \
--env="http_proxy=${APP_PROXY_URL}" --env="https_proxy=${APP_PROXY_URL}" \
--env="NO_PROXY=${APP_NO_PROXY}" --env="no_proxy=${APP_NO_PROXY}" \
--command -- sh -c '
echo ">>>>> SECTION_A_WITH_PROXY_ENV";
curl -v -sS -m 20 -o /dev/null https://api.tracebloc.io/ 2>&1;
echo ">>>>> SECTION_B_PROXY_ENV_UNSET";
env -u HTTP_PROXY -u HTTPS_PROXY -u http_proxy -u https_proxy -u NO_PROXY -u no_proxy curl -v -sS -m 20 -o /dev/null https://api.tracebloc.io/ 2>&1;
echo ">>>>> SECTION_END"'

# Wait for the pod to finish, then read its single log once.
for _ in $(seq 1 90); do
phase="$(kubectl get pod egress-app -o jsonpath='{.status.phase}' 2>/dev/null || true)"
[[ "$phase" == "Succeeded" || "$phase" == "Failed" ]] && break
sleep 2
done
applog="$(kubectl logs egress-app 2>/dev/null || true)"
a_section="$(printf '%s\n' "$applog" | awk '/SECTION_A_WITH_PROXY_ENV/{f=1;next} /SECTION_B_PROXY_ENV_UNSET/{f=0} f')"
b_section="$(printf '%s\n' "$applog" | awk '/SECTION_B_PROXY_ENV_UNSET/{f=1;next} /SECTION_END/{f=0} f')"

# Proof is CLIENT-side from `curl -v` — deterministic, unlike squid's access log
# which the log daemon buffers and may not have flushed when we read it.
printf '%s\n' "$a_section" | grep -iE 'Establish HTTP proxy tunnel|CONNECT tunnel established|< HTTP/1.1 200' | sed 's/^/ A WITH proxy env: /'
printf '%s\n' "$b_section" | grep -iE 'Trying|Connected to|< HTTP/1.1 200' | sed 's/^/ B env unset: /'
# (A) WITH the ingestion proxy env, the backend call MUST traverse the squid.
if ! printf '%s' "$a_section" | grep -qiE 'Establish HTTP proxy tunnel to api\.tracebloc\.io|CONNECT tunnel established'; then
error "App pod WITH the ingestion proxy env did NOT tunnel through the squid — ingestion-style backend egress is not proxied (the #119 bug)."
fi
# (B) With the env unset, the SAME call MUST NOT use a proxy (it dials direct).
if printf '%s' "$b_section" | grep -qiE 'proxy tunnel|CONNECT tunnel established'; then
error "App pod with the proxy env unset still used a proxy — unexpected; that path should dial direct."
fi
success "App-pod egress verified: WITH the ingestion proxy env the backend call tunnelled through the in-cluster squid; with it unset the same call dialled direct."

echo ""
echo "E2E PASS: cluster came up via an AUTHENTICATED proxy and pulled a workload through it."
echo "E2E PASS: cluster came up via an AUTHENTICATED proxy, pulled a workload through it, and an ingestion-style app pod egressed to the backend through a proxy (a no-proxy pod bypassed it)."
Loading