diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 0611203..1678691 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -58,6 +58,49 @@ jobs: if: always() run: docker compose down --volumes --remove-orphans + golden-oracle: + name: Golden oracle (master-only) + runs-on: ubuntu-latest + # Master-only, slow, pre-deploy behavioral regression gate. NOT part of `verify` + # and NOT a PR-required check — it needs the docker-compose services and may run + # for several minutes. It pins the current ingest→stats behavior so a + # behavior-preserving refactor stays green and any drift turns red. + if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main') + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Setup pnpm + uses: pnpm/action-setup@v6 + with: + run_install: false + + - name: Setup Node.js + uses: actions/setup-node@v6 + with: + node-version: 25 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Start integration services + run: | + set -euo pipefail + docker compose up -d postgres rabbitmq minio + timeout 120 bash -c 'until docker compose exec -T postgres pg_isready -U solid -d solid_stats; do sleep 2; done' + timeout 120 bash -c 'until docker compose exec -T rabbitmq rabbitmq-diagnostics -q ping; do sleep 2; done' + timeout 120 bash -c 'until curl -fsS http://127.0.0.1:9000/minio/health/live; do sleep 2; done' + docker compose run --rm minio-create-bucket + + - name: Run golden oracle + run: pnpm run test:golden + + - name: Stop integration services + if: always() + run: docker compose down --volumes --remove-orphans + contract-diff: name: Contract diff runs-on: ubuntu-latest diff --git a/.planning/STATE.md b/.planning/STATE.md index 480aefd..bf4725e 100644 --- a/.planning/STATE.md +++ b/.planning/STATE.md @@ -28,7 +28,7 @@ See: .planning/PROJECT.md (updated 2026-05-31) Phase: Parity / Phase 1 — Game-Type-Aware Statistics — COMPLETE (5/5 plans + review fixes) Plan: 01-01..01-05 done; code-review BLOCK→fixed→APPROVE Status: Phase implemented + reviewed; landing to master. Migration 0008 (game_type dimension + nullable rotation_id + NULLS NOT DISTINCT + is_show) & 0009 (stale NULL-type cleanup); set-based classification; per-type + all-time recalc; per-type legacy-export/parity-sql; audit path made game-type-correct. pnpm verify green, 100% cov, OpenAPI diff empty. Deferred: large-bucket perf pass (review findings 3/4/5 + parity-driver flag). -Last activity: 2026-06-15 — Landed quick tasks 260615-u06 (F9 excludePlayers) and 260615-v6m (F5 orphaned-published reconciler) to master via PRs #22/#23; pnpm verify green, 100% cov +Last activity: 2026-06-17 — Built golden e2e integration oracle (260617-v4e): full ingest→stats chain on real PG/RabbitMQ/S3, characterization snapshots + hand-computed bounty anchors + pinned invariants; master-only `test:golden` outside verify; pnpm verify green, test:golden 26 live tests, 100% cov ## Performance Metrics @@ -135,6 +135,7 @@ Decisions are logged in PROJECT.md Key Decisions table. Recent decisions affecti | 260614-r9k | Guard all-time recalc against NULL replay_timestamp (toISOString crash) | 2026-06-14 | b0275e0 | [260614-r9k-recalc-null-timestamp-guard](./quick/260614-r9k-recalc-null-timestamp-guard/) | | 260615-u06 | F9 — apply the legacy excludePlayers exclusion to the player leaderboard | 2026-06-15 | b80a235 | [260615-u06-f9-excludeplayers-apply-the-legacy-exclu](./quick/260615-u06-f9-excludeplayers-apply-the-legacy-exclu/) | | 260615-v6m | F5 — reconciler re-queues orphaned `published` parse_jobs (self-healing ingest) | 2026-06-15 | f4e0c1b | [260615-v6m-f5-reconciler-for-orphaned-published-par](./quick/260615-v6m-f5-reconciler-for-orphaned-published-par/) | +| 260617-v4e | Golden e2e integration oracle — pins ingest→stats pipeline behavior (real PG/RabbitMQ/S3) before the Phase 2 refactor; master-only `test:golden`, not in verify | 2026-06-17 | 7a93295 | [260617-v4e-golden-e2e-integration-oracle-for-ingest](./quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/) | ## Deferred Items diff --git a/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-CONTEXT.md b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-CONTEXT.md new file mode 100644 index 0000000..82e9fda --- /dev/null +++ b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-CONTEXT.md @@ -0,0 +1,120 @@ +# Quick Task 260617-v4e: Golden e2e integration oracle — Context + +**Gathered:** 2026-06-17 +**Status:** Ready for planning +**Full rationale:** see `DEEP-BRAINSTORM.md` in this directory (the locked decision pack from a deep +Socratic brainstorm). This CONTEXT is the lean digest — decisions here are LOCKED, do not re-litigate. + + +## Task Boundary + +Build golden end-to-end integration test(s) that pin the **current observable behavior** of the +`server-2` ingest→stats pipeline (plus the public read surface) as a **behavioral regression oracle** +BEFORE the upcoming **Phase 2 Track C refactor** (Oxfmt mass-reformat + full Oxlint + `tsc`→tsdown +2-entry + depcruise/knip/lefthook — explicitly behavior-preserving). The oracle catches integration-level +drift that the unit suite (mocked boundaries) and the frozen-contract/oasdiff gate (API shape only) miss. + +Convention-bound test work — author it THROUGH `solidstats-server-ts-tests` (+ shared testing standards), +citing the rules relied on. Do NOT hand-roll. + + + +## Implementation Decisions (LOCKED) + +### Scope — full chain + read API +One golden test drives the real production path per real artifact: +`IntervalTask.runOnce()` → `IngestPromotionService.promotePending()` (`src/modules/ingest/service.ts`) +→ durable `parse_jobs` row + RabbitMQ publish (`src/modules/ingest/publisher.ts`) +→ real broker delivery → `ParseCompletedMessage` consumer (`src/infra/queue/rabbitmq.ts`) +→ real S3 artifact load (`artifactLoader.loadParserArtifact({bucket,key})`, `src/modules/ingest/runtime.ts:94`) +→ `recordParserCompleted()` (`src/modules/ingest/repository/repository.ts:525`) +→ `ParserResultRecalculationService.recalculateParserResult()` (`src/modules/statistics/service/recalculation.ts`) +→ assert via `GET /stats/*` (`src/modules/public-stats/...`). + +### Realism — real PG + real RabbitMQ + real S3 (no mocked boundary) +Mirror the existing harness: docker-compose services on fixed localhost ports (PG `15432`, Rabbit `5673`, +S3 `9000`, env-overridable), real schema via `runMigrations()`, `truncate … cascade` isolation, +**unique S3 keys + ephemeral queue per run**. A mock at a contract boundary hides the exact failures the +oracle exists to catch (brief anti-pattern #1). Drive promotion via `IntervalTask.runOnce()` (no real +timer — principle 9); await parse-completed via a **bounded DB-state poll** (the consumer exposes no +completion Promise; the test may run long). + +### Fixtures — hundreds of REAL artifacts, committed as a gzip archive, unpacked at test start +Real `ParserArtifact` JSONs (the shape `server-2` ingests — `src/modules/statistics/parser-artifact.ts`, +matches parser-2 `parse-artifact-v3.schema.json`). Stored as ONE committed gzip archive in-tree, unpacked +at test start, iterated with `test.each`. **Capture is gated** (agent lacks VPS access): a deterministic +**capture script pulls the real production artifacts from the VPS over SSH** (the actual objects prod +ingested) and packs the archive — human runs it once under `!`. Note: Happ VPN is always-on; SSH to own +VPS needs the `ip rule` bypass or it hangs (global memory `happ-vpn-bypass-for-servers`). Local fallback +floor = the ~10–13 `replay-parser-2` golden inputs parsed via its CLI, committed so the oracle is never +empty. The test **guards on archive presence and skips cleanly** when absent (principle 8). + +### Assertions — characterization snapshots + bounty anchor +Golden snapshots of the FULL observable surface (`parser_results` + all evidence fields, `parser_events`, +`player_stats`, `squad_stats`, `commander_side_stats`, `bounty_points`, terminal `parse_jobs`, +`ingest_staging_records` status/evidence, and `GET /stats/*` responses) with **deterministic +normalization** (UUID→stable natural key by checksum/nickname/replay, timestamps redacted, rows sorted), +PLUS hand-computed bounty assertions on 2–3 anchor cases (bounty values are business-critical — check +semantics, not only snapshot equality). Pin CURRENT behavior as-is; if a pinned behavior is known +tech-debt, comment it + point to backlog — do NOT "fix" inside the oracle (principle 7). + +### Invariants / idempotency to pin (from current code, as-is) +- Durable `parse_jobs` row exists **before** the RabbitMQ publish (never fire-and-forget). +- Re-promote same staging row → dedup/no-op: `status='promoted'` + `promotion_evidence.duplicate_replay_id`. +- Same `source_system`+`source_replay_id`, different bytes/checksum → `status='conflicted'` + + `conflict_details.reason='source_identity_changed_bytes'` (`service.ts:147`). +- Checksum-duplicate (no source match) → `status='promoted'` + duplicate evidence appended (`service.ts:166`). +- Re-deliver same `parse.completed` → terminal state recorded once. +- Auth/role gate (flow 4): a protected route rejects without role / accepts with role via the shared + `requireRole`/`requireAnyRole` pre-handlers (`src/modules/auth/routes/authorization.ts`). + +### Gate placement — master-only, slow, separate from `verify` +Dedicated script (e.g. `test:golden`) + a **master-only pre-deploy CI job**. NOT in `verify` and NOT in +`test:coverage` → zero coverage obligation (principle 10); `verify` stays green at 100% without the +archive (principle 8). The test MAY run long — that is accepted and intended. + +### Cross-app boundary (from replay-parser-2 decision pack — respect it) +The parser does NOT calculate bounty. The parser emits compact kill/stat facts; **`server-2` computes +final bounty from previous-rotation effectiveness + cross-replay state**. Consequence for fixtures: a +single-artifact run yields meaningful bounty ONLY if a **previous rotation with known effectiveness is +seeded**. The bounty anchor cases MUST set up the previous-rotation state. CORRECTION (RESEARCH §1): +server-2 does NOT verify artifact bytes on ingest — `loadParserArtifact` is plain `JSON.parse`, no schema +or checksum gate; `artifact_checksum`/`source_checksum` are stored as metadata only and need not match the +bytes (byte-verification is parser-2's job). A fixture needs only a well-formed `^[0-9a-f]{64}$` checksum. + +### Out of scope (non-goals) +- request/moderation **business-logic workflow** (Phase 2 rewrites it → pinning = false reds). Only the + role-gate mechanism is in scope. +- NOT wired into fast `verify`/`test:coverage`; no coverage obligation. +- NO fresh-schema/bucket/db per test — repo convention is `truncate … cascade` (Step 0: repo overrides + the generic brief). +- NOT a parity/value-vs-legacy comparison (that is the cutover diff harness). Pins `server-2`'s OWN + current behavior. + + + +## Specific Ideas + +- Harness divergence already documented in `.planning/codebase/TESTING.md`: integration suite connects to + **docker-compose** services, NOT programmatic testcontainers. Follow that, not the brief's "testcontainers". +- Existing references to mirror for wiring: `src/test/integration/adapters.test.ts` (real PG+Rabbit+S3 + health), `src/modules/ingest/repository/tests/postgres.test.ts` (real `IngestPromotionService` + + `PgIngestRepository` + Postgres, reuses seed helpers). +- Extract ONE shared fixture-loader/unpacker and ONE snapshot-normalizer; reuse the production schema/types + (never a hand-mirrored copy) — principle 9. +- `verify` for the plan's tasks must rely on typecheck/lint + unit + the golden test **skipping cleanly** + when Docker/the archive are absent (live run is CI/master-only). Docker is frequently unavailable in the + local dev env — the golden test and its `verify` step must tolerate that. + + + +## Canonical References + +- `DEEP-BRAINSTORM.md` (this directory) — full decision pack, question ledger, risks, acceptance criteria. +- `/tmp/golden-integration-test-prompt.md` — the reusable source brief (server-2 section is ground truth; + read its "real call path", "durable-job invariant", "high-value golden flows", anti-patterns). +- `.planning/codebase/TESTING.md` — the repo's actual testing reality (harness, coverage gate). +- Skills: `solidstats-server-ts-tests` (harness, per-layer map, coverage), `solidstats-server-ts-conventions`, + `solidstats-shared-testing-standards`, `solidstats-shared-project-standards`. +- parser-2 `schemas/parse-artifact-v3.schema.json` — the cross-app artifact contract. + diff --git a/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-PLAN.md b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-PLAN.md new file mode 100644 index 0000000..59009e0 --- /dev/null +++ b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-PLAN.md @@ -0,0 +1,487 @@ +--- +phase: 260617-v4e-golden-e2e-integration-oracle-for-ingest +plan: 01 +type: execute +wave: 1 +depends_on: [] +files_modified: + - package.json + - src/test/golden/fixtures/loader.ts + - src/test/golden/fixtures/normalize.ts + - src/test/golden/fixtures/artifacts.tar.gz + - src/test/golden/scripts/capture-artifacts.sh + - src/test/golden/scripts/build-floor-archive.sh + - src/test/golden/README.md + - src/test/golden/pipeline.golden.test.ts + - src/test/golden/bounty-anchor.golden.test.ts + - src/test/golden/invariants.golden.test.ts + - src/test/golden/__snapshots__/ +autonomous: true +requirements: + - INGEST-02 + - INGEST-03 +user_setup: + - service: production-vps-s3 + why: "Capture the hundreds of REAL production parser artifacts for the full golden corpus (agent has no VPS access). Floor corpus from parser-2 is committed so the oracle is never empty without this step." + env_vars: + - name: VPS_S3_ENDPOINT + source: "Production VPS — the S3/MinIO endpoint URL hosting the solid-replays bucket" + - name: VPS_S3_BUCKET + source: "Production VPS — artifact bucket name (observed default: solid-replays)" + - name: VPS_S3_ACCESS_KEY_ID + source: "Production VPS S3 credentials" + - name: VPS_S3_SECRET_ACCESS_KEY + source: "Production VPS S3 credentials" + dashboard_config: + - task: "Ensure the Happ VPN ip-rule bypass for the VPS host is active before running the capture script (else mc/aws S3 over SSH/VPN hangs) — see global memory happ-vpn-bypass-for-servers" + location: "Local shell — ip rule for VPS_S3_ENDPOINT host" + +must_haves: + truths: + - "The full production ingest→stats path runs end-to-end against real PG + real RabbitMQ + real S3 (no mocked contract boundary), driven through the same factories server.ts wires." + - "A durable parse_jobs row exists BEFORE the RabbitMQ parse.requested publish (durable-job invariant pinned)." + - "A real parse.completed delivered through the live broker loads the S3 artifact, records the parser result, recalculates aggregates/bounty, and the new state is observable via GET /stats/* backed by PgPublicStatsReadModel." + - "Hand-computed bounty values on 2-3 anchor cases match the persisted bounty_points (semantics checked, not only snapshot equality), with a seeded previous rotation supplying effectiveness." + - "Re-promoting the same staging row dedups (status=promoted + promotion_evidence.duplicate_replay_id); a same-source different-bytes row conflicts (status=conflicted + conflict_details.reason=source_identity_changed_bytes); a re-delivered parse.completed records terminal state once." + - "The protected admin route rejects without role and accepts with role via the shared requireRole pre-handler." + - "The golden suite SKIPS cleanly (not fails) when Docker services or the fixture archive are absent, so `pnpm verify` stays green at 100% without them." + - "The golden suite is wired OUT of verify and test:coverage (zero coverage obligation) and into a dedicated test:golden script." + artifacts: + - path: "src/test/golden/fixtures/loader.ts" + provides: "ONE shared fixture loader — unpacks artifacts.tar.gz to a per-run tmp dir, iterates real ParserArtifact JSONs, exposes archive-presence + docker-availability guards for clean skip" + min_lines: 40 + - path: "src/test/golden/fixtures/normalize.ts" + provides: "ONE shared snapshot normalizer — uuid→stable natural key map, timestamp redaction, deterministic row sort" + min_lines: 40 + - path: "src/test/golden/pipeline.golden.test.ts" + provides: "Full-chain characterization oracle: promote → durable job → real broker publish → completed consumer → recalc → GET /stats/* snapshot" + contains: "test.each" + - path: "src/test/golden/bounty-anchor.golden.test.ts" + provides: "2-3 hand-computed bounty anchor cases with seeded previous-rotation effectiveness, asserted with toEqual" + - path: "src/test/golden/invariants.golden.test.ts" + provides: "Idempotency/conflict/role-gate invariants pinned as-is" + - path: "src/test/golden/scripts/capture-artifacts.sh" + provides: "Gated, deterministic VPS S3 capture script (args/env-driven), logs captured-vs-skipped counts, packs artifacts.tar.gz" + - path: "src/test/golden/scripts/build-floor-archive.sh" + provides: "Committable floor: runs replay-parser-2 CLI over its golden OCAP corpus and packs the floor archive so the oracle is never empty" + - path: "src/test/golden/README.md" + provides: "One-line human capture command, floor-build command, skip semantics, gate placement, and pinned-tech-debt notes" + - path: "package.json" + provides: "test:golden script that no verify-chained script globs" + contains: "test:golden" + key_links: + - from: "src/test/golden/pipeline.golden.test.ts" + to: "src/server.ts" + via: "constructs the SAME production factories server.ts wires (PgIngestRepository, IngestPromotionService, ParseJobPublisher, PgStatisticsRepository, ParserResultRecalculationService, createRabbitMqParserRuntime, createStorageClient, buildApp+PgPublicStatsReadModel) — never a hand-mirrored copy" + pattern: "createRabbitMqParserRuntime|PgPublicStatsReadModel|ParserResultRecalculationService" + - from: "src/test/golden/pipeline.golden.test.ts" + to: "src/infra/db/migrate.ts" + via: "runMigrations(config.databaseUrl) in beforeAll builds the real schema; truncate … cascade between cases" + pattern: "runMigrations" + - from: "src/test/golden/fixtures/loader.ts" + to: "src/test/golden/fixtures/artifacts.tar.gz" + via: "unpacks the committed archive at test start; guard returns skip when absent" + pattern: "artifacts\\.tar\\.gz" + - from: "package.json test:golden" + to: "src/test/golden" + via: "vitest run src/test/golden --no-file-parallelism — a path no verify-chained script (test, test:integration, test:coverage) targets" + pattern: "test:golden" +--- + + +Build a golden end-to-end integration oracle that pins the CURRENT observable behavior of the +`server-2` ingest→stats pipeline (plus the public read surface and the auth role-gate), against +real PostgreSQL + RabbitMQ + S3, as a behavioral regression net BEFORE the Phase 2 Track C +behavior-preserving refactor. The oracle catches integration-level drift that the mocked-boundary +unit suite and the API-shape-only contract gate miss. + +Purpose: A behavior-preserving refactor must keep this suite green; any behavioral drift must turn +it red. It is a SEPARATE master-only pre-deploy gate, NOT part of fast `verify`. + +Output: A `test:golden` suite at `src/test/golden/**` (one shared fixture loader, one snapshot +normalizer, three test files), the gated SSH/S3 capture script + the committable parser-2 floor +build script + the committed floor archive, a README with the one-line human run command, and a +`package.json` `test:golden` script that no `verify`-chained script sweeps. + +SCOPE-SANITY CALL (preamble, for the plan-checker): This is honestly PHASE-SIZED work, not a +typical "quick" task — it stands up a brand-new real-infra test harness, a fixture capture/floor +pipeline, three test files, and a deterministic normalizer. It is delivered as a SINGLE PLAN with +6 atomic, independently-committable tasks (T1→T6) sequenced so `pnpm verify` stays green between +every commit (the golden suite is OUT of `verify` from T1 onward, and skips cleanly without +Docker/archive). The size is inherent to a real-boundary oracle and cannot be reduced without +mocking a contract boundary (the exact anti-pattern this oracle exists to defeat). No feature is +omitted, simplified, or deferred. If the orchestrator prefers, T1-T2 (scaffold+floor+full-chain) +and T3-T6 (anchors+invariants+capture+docs) are a natural two-commit-group split, but a single +plan keeps the shared helpers coherent. + + + +@/home/afgan0r/Projects/SolidGames/server-2/.claude/gsd-core/workflows/execute-plan.md +@/home/afgan0r/Projects/SolidGames/server-2/.claude/gsd-core/templates/summary.md + + + +@.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-CONTEXT.md +@.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-RESEARCH.md +@.planning/STATE.md + +# Production wiring the test MUST mirror (reuse these factories, never re-implement): +@src/server.ts +@src/app.ts +@src/modules/ingest/runtime.ts +@src/infra/queue/rabbitmq.ts +@src/infra/queue/messages.ts +@src/infra/storage/client.ts +@src/modules/statistics/service/recalculation.ts +@src/modules/statistics/parser-artifact.ts + +# Existing real-infra integration tests to mirror for wiring/isolation: +@src/test/integration/adapters.test.ts +@src/modules/ingest/repository/tests/postgres.test.ts + +# Skills the executor MUST author THROUGH (read first, cite the rules relied on): +@.claude/skills/solidstats-server-ts-tests/SKILL.md +@.claude/skills/solidstats-server-ts-conventions/SKILL.md +@.claude/skills/solidstats-shared-testing-standards/SKILL.md +@.claude/skills/solidstats-shared-project-standards/SKILL.md + + + + + + Task 1: Scaffold golden suite out of verify — shared loader, normalizer, skip guards, test:golden script + package.json, src/test/golden/fixtures/loader.ts, src/test/golden/fixtures/normalize.ts, src/test/golden/README.md, src/test/golden/pipeline.golden.test.ts + +Establish the golden suite as a separate gate placed at `src/test/golden/**` (NOT under +`src/test/integration/` — RESEARCH open-item #3: the `test:integration` positional glob +`vitest run src/test/integration $(find src -path '*/tests/postgres.test.ts')` would otherwise +sweep it into `verify`). Add a `test:golden` script to package.json: +`vitest run src/test/golden --no-file-parallelism`. Do NOT add it to `verify` and do NOT add it +to `test:coverage` (`test:golden` is a separate invocation never run by either → zero coverage +obligation per `[tests]` Coverage gate; `src/test/**` is already coverage-excluded in +vitest.config.ts line 15). Confirm by inspection that NONE of the verify-chained scripts +(`test`, `test:integration`, `test:coverage`) target `src/test/golden` — the bare `test` script +globs `src/**/*.test.ts` but its filename suffix `.golden.test.ts` still matches `*.test.ts`, so +the suite files MUST guard-skip when infra is absent (the bare `test` run has no Docker), which +the loader's guards provide; verify the `test` and `test:coverage` runs pass with the new files +present and Docker absent (skips cleanly), keeping `verify` green at 100%. + +Create `src/test/golden/fixtures/loader.ts` as the ONE shared fixture loader/unpacker (principle 9, +no duplication): unpack the committed `src/test/golden/fixtures/artifacts.tar.gz` to a per-run tmp +dir (`os.tmpdir()` + a run id) at test start, return the list of real `ParserArtifact` JSONs typed +as the PRODUCTION `ParserArtifact` from `src/modules/statistics/parser-artifact.ts` (never a +hand-mirrored shape — principle 9). Expose two guards used by every golden test for clean skip +(principle 8 / CONTEXT): `archivePresent()` (false when artifacts.tar.gz is missing) and +`dockerReachable(config)` (probes PG+RabbitMQ+S3 health via the same `createDbClient`/ +`createQueueClient`/`createStorageClient` adapters `adapters.test.ts` uses, with a short connect +timeout; false on connect failure). Tests call `describe.skipIf(!present || !reachable)` so the +suite reports SKIPPED, not FAILED, when Docker or the archive are absent. Use the fixed-port env +defaults from `adapters.test.ts`/`postgres.test.ts` (PG 15432, Rabbit 5673, S3 9000) via +`loadConfig`. + +Create `src/test/golden/fixtures/normalize.ts` as the ONE shared snapshot normalizer +(RESEARCH §4 recipe): build a uuid→stable-natural-key map (replay: `source_system+source_replay_id` +or checksum; player: nickname/steam_id; squad: tag; job: replay-key+contract_version; rotation: +name) and substitute every uuid id AND fk column (`replay_id`,`parse_job_id`,`parser_result_id`, +`rotation_id`) through it; redact timestamp columns (`created_at`/`updated_at`/`calculated_at`/ +`finished_at`/`published_at`/`started_at`) to a fixed token; sort rows by natural key before +snapshotting. Keep contractual order (cursor-paginated lists, bounty `inputs.events[]`) — do NOT +re-sort those. + +Create `src/test/golden/README.md` (English — project rule H) documenting: the one-line human +capture command (`bash src/test/golden/scripts/capture-artifacts.sh` with the env vars), the floor +build command, the skip semantics, the master-only gate placement, and a "pinned tech-debt" note +template for behaviors recorded as-is (principle 7). + +Create `src/test/golden/pipeline.golden.test.ts` as a SKELETON only in this task (the loader/skip +plumbing + a single `it` that asserts `loader.archivePresent()` is wired and the suite skips +cleanly when infra is absent) — the full chain assertions land in Task 2. This keeps the commit +atomic and `verify`-green. + +NOTE for `` literal hygiene: the negative-grep token `test:coverage` below names a +package.json script key; it legitimately appears in package.json and is not a forbidden literal in +any written source file. + + + pnpm run typecheck && pnpm run lint && grep -q '"test:golden"' package.json && ! grep -E '"verify".*test:golden|"test:coverage".*src/test/golden' package.json && grep -rq "ParserArtifact" src/test/golden/fixtures/loader.ts && pnpm test 2>&1 | tail -5 + + +package.json has a `test:golden` script targeting `src/test/golden`; no verify-chained script +sweeps that path. `loader.ts` imports the production `ParserArtifact` type and exposes +`archivePresent()` + `dockerReachable()` guards; `normalize.ts` exports the id-map + +timestamp-redaction + row-sort normalizer. README documents the capture/floor/skip/gate. The +skeleton `pipeline.golden.test.ts` skips cleanly with no Docker. `pnpm test` passes; typecheck and +lint pass. `verify` remains green (golden suite carries zero coverage obligation). + + + + + Task 2: Floor archive + full-chain characterization oracle (promote → durable job → real broker → consumer → recalc → GET /stats/*) + src/test/golden/scripts/build-floor-archive.sh, src/test/golden/fixtures/artifacts.tar.gz, src/test/golden/pipeline.golden.test.ts, src/test/golden/__snapshots__/ + +First make the oracle non-empty WITHOUT VPS access. Write `src/test/golden/scripts/build-floor-archive.sh`: +for each `success`/`partial` OCAP input in the sibling `replay-parser-2` golden corpus +(`../replay-parser-2/crates/parser-core/tests/fixtures/*.ocap.json`, enumerated from its +`golden/manifest.json`), run the parser-2 CLI +`cargo run --release --bin replay-parser-2 -- parse --input --output .json` +(per RESEARCH §2) to emit a real `ParseArtifact` JSON, then `tar czf +src/test/golden/fixtures/artifacts.tar.gz` over the outputs. EXCLUDE the `invalid-json` → +`status:"failed"` corpus entry from the broker-fed set (RESEARCH pitfall: the consumer +nacks-with-requeue on any throw → infinite redelivery; only `success`/`partial` artifacts are safe +to round-trip through the real broker). Run this script once to produce and COMMIT +`artifacts.tar.gz` (so the oracle is never empty / never silently skips to zero — principle 8); log +captured-vs-skipped counts (no silent caps). If `cargo`/parser-2 are unavailable in the execution +env, commit a minimal hand-assembled-from-real-shape floor (real parser-2 output captured +elsewhere) and note it in the README — do NOT commit synthetic toy blobs (principle 4); the script +remains the source of truth. + +Complete `src/test/golden/pipeline.golden.test.ts` as the full-chain oracle, mirroring the exact +production wiring in `server.ts` (reuse production factories — `[conventions]` factory-DI; never +re-implement): construct `PgIngestRepository`, `IngestPromotionService`, `ParseJobPublisher`, +`PgStatisticsRepository`, `ParserResultRecalculationService`, the real broker via +`createRabbitMqParserRuntime(config)`, the real S3 via `createStorageClient(config)`, and build the +app via `buildApp({ publicStatsReadModel: new PgPublicStatsReadModel(pool) })` so `GET /stats/*` +reflects recalculated DB state (NOT the in-memory default read model). `beforeAll`: +`runMigrations(config.databaseUrl)` (real schema, never mirrored DDL — `[tests]` Integration +Harness). `beforeEach`: `truncate parser_results, parser_events, player_stats, squad_stats, +commander_side_stats, bounty_points, parse_jobs, replays, ingest_staging_records, rotations cascade` +(repo isolation convention is `truncate … cascade`, NOT fresh schema/db — CONTEXT Step-0 override). + +For EACH floor artifact (`test.each` over the loader output): (1) insert an `ingest_staging_records` +row pointing at a UNIQUE S3 key (`artifacts/v3//.json`) and upload the artifact bytes to +that key via the storage client `PutObjectCommand` (real S3); (2) drive promotion by calling +`promotionService.promotePending(...)` then `publisher.publishQueued(...)` directly (these are +exactly what `createIngestRuntime` wraps — `runtime.ts:67,78` — and `IntervalTask.runOnce()` swallows +throws, so the public methods give clearer assertions); ASSERT a durable `parse_jobs` row exists +BEFORE the publish (durable-job invariant — CONTEXT, `[conventions]` queue reliability) and assert +the published `parse.requested` message contract; (3) publish a real `parse.completed` +`ParseCompletedMessage` (shape from `messages.ts`; `artifact_checksum` can be any well-formed +`^[0-9a-f]{64}$` — server-2 does NOT verify artifact bytes, RESEARCH §1 correction) onto exchange +`solid_stats.parser` rk `parse.completed` through the real `createRabbitMqParserRuntime` consumer +wired to `recordParserCompleted`+`recalculateParserResult`; (4) bounded-poll `parse_jobs.status = +'succeeded'` AND a `current` `parser_results` row for the replay, with a HARD timeout ceiling +(e.g. 30s, 200ms interval) — the poll timeout is the only backstop against the nack-requeue loop +(RESEARCH §await-seam; no real timers in unit loops, but a real-broker integration poll loop is +permitted per `[testing-standards] §E` since deterministic clock control is not possible for a live +broker — cite this). Then ASSERT the full observable surface with the shared normalizer: snapshot +(`toMatchFileSnapshot()` under `src/test/golden/__snapshots__/`) the normalized rows of +`parser_results` (+ evidence fields), `parser_events`, `player_stats`, `squad_stats`, +`commander_side_stats`, `bounty_points`, terminal `parse_jobs`, `ingest_staging_records` +status/evidence, AND the normalized `GET /stats/*` responses via `app.inject`. Pin CURRENT behavior +as-is; if a snapshot captures known tech-debt, add a one-line comment + backlog pointer (principle 7), +do NOT fix it here. Close the broker/app/pool in `afterAll`. + + + pnpm run typecheck && pnpm run lint && test -f src/test/golden/fixtures/artifacts.tar.gz && grep -q "runMigrations" src/test/golden/pipeline.golden.test.ts && grep -q "createRabbitMqParserRuntime" src/test/golden/pipeline.golden.test.ts && grep -q "PgPublicStatsReadModel" src/test/golden/pipeline.golden.test.ts && pnpm run test:golden 2>&1 | tail -15 + + +`build-floor-archive.sh` produces real parser-2 artifacts and packs `artifacts.tar.gz` (committed, +non-empty, success/partial only). `pipeline.golden.test.ts` drives the real chain through the same +factories `server.ts` wires, asserts the durable-job-before-publish invariant and the published +message contract, round-trips a real `parse.completed` through the live broker, bounded-polls to +terminal `succeeded`, and snapshots the normalized full observable surface incl. `GET /stats/*`. +With Docker up + archive present, `test:golden` runs the full chain green; with Docker absent it +skips cleanly. `verify` unaffected. typecheck + lint pass. + + + + + Task 3: Bounty anchor oracle — 2-3 hand-computed cases with seeded previous-rotation effectiveness + src/test/golden/bounty-anchor.golden.test.ts + +Create `src/test/golden/bounty-anchor.golden.test.ts` (same harness/skip guards as Task 2, +`truncate … cascade` isolation). Bounty is business-critical → assert SEMANTICS with `toEqual` +hand-computed values, not only snapshot equality (CONTEXT; `[testing-standards] §G` strong oracle). +Per RESEARCH §5, a non-trivial bounty requires a seeded PREVIOUS rotation supplying effectiveness: +seed two `rotations` (`prev` with earlier `starts_at`, `current` whose window covers the artifact's +`replay_timestamp`), seed in `prev` a `player_stats` row for the victim player with +`stats = {"kills":K,"deaths":{"total":D}}` (matching `game_type`) and optionally a `squad_stats` +row for the victim's squad, and ensure the artifact under test resolves to `current` (its replay +timestamp inside the `current` window). The artifact must contain an enemy `kill` +(`players[].kills[].c="enemy_kill"`) whose victim `eid` resolves to the seeded player. Then run the +same full chain (promote → broker → completed consumer → recalc) and assert `bounty_points` matches +the hand-computed `points = round₂((1+playerEff)·(1+squadEff))`, `eff = kills/max(1,deaths.total)` +(`bounty.ts:124`, `repository.ts:1073`). + +Cover three anchor cases (CONTEXT / RESEARCH worked example): (a) player-only effectiveness +(victim prev kills=3 deaths.total=1 → playerEff=3, no squad → squadFactor=0 → points=4.00); +(b) player+squad effectiveness; (c) excluded teamkill → 0 (`bounty.ts:103` teamkill/unknown/ +missing-victim → 0). Bind shared Arrange/Assert values to variables (`[testing-standards] §C` DRY). +Use the production bounty path only — recompute expected values by hand from the formula, never call +the production calculator to produce the expectation (RESEARCH Don't-Hand-Roll). Pin current +behavior as-is. + + + pnpm run typecheck && pnpm run lint && grep -q "toEqual" src/test/golden/bounty-anchor.golden.test.ts && grep -Eq "round|4\\.00|playerEff|deaths" src/test/golden/bounty-anchor.golden.test.ts && pnpm run test:golden 2>&1 | tail -10 + + +Three bounty anchor cases (player-only, player+squad, excluded teamkill) seed a previous rotation, +drive the real chain, and assert `bounty_points` against hand-computed values with `toEqual`. Skips +cleanly without Docker; passes the full chain with Docker up. typecheck + lint pass; `verify` +unaffected. + + + + + Task 4: Invariant oracle — idempotency, dedup, conflict, re-delivery, role-gate + src/test/golden/invariants.golden.test.ts + +Create `src/test/golden/invariants.golden.test.ts` (same harness/skip guards, `truncate … cascade` +isolation). Pin the invariants from CONTEXT as-is (principle 7): + +1. Durable-job-before-publish: already asserted in the full-chain (Task 2); restate a focused + assertion that the `parse_jobs` row exists before any `parse.requested` publish (never + fire-and-forget — `[conventions]` queue reliability). +2. Re-promote the same staging row → dedup/no-op: `status='promoted'` + + `promotion_evidence.duplicate_replay_id` (checksum-duplicate path, `service.ts:166`). +3. Same `source_system`+`source_replay_id`, different bytes/checksum → `status='conflicted'` + + `conflict_details.reason='source_identity_changed_bytes'` (`service.ts:147`). This is a + documented SYNTHETIC staging pair (RESEARCH open-item #2) — assert via the repository/service + directly, NOT by round-tripping a throwing artifact through the broker. +4. Re-deliver the same `parse.completed` → terminal state recorded once: call the completed path + twice (or publish twice through the broker and bounded-poll), assert `recordParserCompleted` + second call is a no-op (returns null / no duplicate `current` parser_results) — idempotency. +5. parse.failed path: hand-build a `parse.failed` `ParseFailedMessage` (documented synthetic + exception — RESEARCH open-item #2) and assert terminal failed state recorded once + (`recordParserFailed` idempotent). Do NOT round-trip a throwing artifact. +6. Auth/role gate (flow 4): build the app, hit a protected admin route (e.g. + `admin/rotations.ts:54` `requireRole(auth,"admin")`) via `app.inject` — assert 401/403 without a + role-bearing session and 200/2xx with one, exercising the shared `requireRole`/`requireAnyRole` + pre-handlers (`auth/routes/authorization.ts:25-29`). Seed the role session via the in-memory auth + store wired into `buildApp` (mirror existing route integration tests). Do NOT pin + request/moderation business-logic workflow (out of scope — Phase 2 rewrites it; CONTEXT non-goals) + — only the role-gate mechanism. + +Each invariant is one focused `it` with AAA + a strong oracle. + + + pnpm run typecheck && pnpm run lint && grep -q "source_identity_changed_bytes" src/test/golden/invariants.golden.test.ts && grep -Eq "requireRole|requireAnyRole|admin" src/test/golden/invariants.golden.test.ts && grep -Eq "duplicate_replay_id|conflict_details" src/test/golden/invariants.golden.test.ts && pnpm run test:golden 2>&1 | tail -10 + + +Six invariants pinned: durable-job-before-publish, re-promote dedup, source-bytes conflict, +re-delivery idempotency, parse.failed terminal-once, and the admin role-gate (401/403 vs 2xx via +the shared pre-handlers). Conflict + parse.failed use documented synthetic inputs (no throwing +artifact through the broker). Skips cleanly without Docker; passes with Docker up. typecheck + lint +pass; `verify` unaffected. + + + + + Task 5: Gated VPS S3 capture script for the full hundreds-of-artifacts corpus + src/test/golden/scripts/capture-artifacts.sh, src/test/golden/README.md + +Write `src/test/golden/scripts/capture-artifacts.sh` — a deterministic, args/env-driven capture +script the HUMAN runs once under `!` to pull the REAL production parser artifacts from the VPS S3 +bucket and pack them into `src/test/golden/fixtures/artifacts.tar.gz` (replacing/augmenting the +floor). Inputs via env (the user-provided open item — RESEARCH open Q1): `VPS_S3_ENDPOINT`, +`VPS_S3_BUCKET` (observed default `solid-replays`), `VPS_S3_ACCESS_KEY_ID`, +`VPS_S3_SECRET_ACCESS_KEY`, and an optional `N` cap. Steps (RESEARCH §3): (1) print a reminder to +ensure the Happ VPN `ip rule` bypass for the VPS host is active (else `mc`/`aws s3` hangs — global +memory `happ-vpn-bypass-for-servers`); (2) pull objects under the observed key prefix +`artifacts/v3/` via `mc cp --recursive` (MinIO client) or `aws s3 cp --recursive` against the VPS S3 +endpoint (prefer S3 client over `scp` — artifacts are S3 objects); (3) optionally fold in the +parser-2 floor (call `build-floor-archive.sh`) so the corpus is a superset; (4) `tar czf` the +archive. The script MUST log captured-vs-skipped counts and never silently cap (principle 8); fail +loudly with a clear message on missing env or zero objects. Do NOT hardcode any VPS host/key/cred +values into the script or any committed file (STATE blocker: never commit host/key values). The +script reads them from env only. + +Update `src/test/golden/README.md` with the exact one-line human run command (env-prefixed +`bash src/test/golden/scripts/capture-artifacts.sh`) and a short note that the committed archive is +the floor until the human runs the capture; document that the live full-corpus run is master-only +CI. NEVER commit the captured production archive's secrets; the archive itself (artifact JSONs) is +committable. + + + pnpm run lint && test -x src/test/golden/scripts/capture-artifacts.sh -o -f src/test/golden/scripts/capture-artifacts.sh && bash -n src/test/golden/scripts/capture-artifacts.sh && grep -q "VPS_S3_ENDPOINT" src/test/golden/scripts/capture-artifacts.sh && grep -Eq "ip rule|happ|VPN" src/test/golden/scripts/capture-artifacts.sh && ! grep -Eq "[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}" src/test/golden/scripts/capture-artifacts.sh && grep -q "capture-artifacts.sh" src/test/golden/README.md + + +`capture-artifacts.sh` is a valid (`bash -n`) deterministic env-driven script that pulls VPS S3 +artifacts, folds in the floor, logs captured-vs-skipped counts, fails loudly on missing env, and +hardcodes NO host/key/cred values (no literal IPs). README has the one-line human run command and +the floor/master-only-CI note. lint passes. + + + + + Task 6: Master-only CI gate wiring + final verify-green confirmation + package.json, src/test/golden/README.md + +Wire the golden suite as a SEPARATE master-only pre-deploy gate (principle 11; CONTEXT gate +placement), NOT into `verify`. Inspect the existing CI workflow (the repo's `Verify`/CI job — +referenced in STATE as the `Verify` GitHub workflow; locate `.github/workflows/*.yml`). Add a +distinct master-only job (e.g. `golden-oracle`) that brings up the docker-compose PG/RabbitMQ/S3 +services, runs `pnpm run test:golden`, and is gated to the master branch (push to master / +pre-deploy), with a generous per-test timeout so a multi-minute run does not false-fail. Do NOT add +`test:golden` to the `verify` chain or to any PR-required check that runs without Docker. If the CI +workflow file is owned by infra/cannot be edited here, document the exact job YAML to add in +`src/test/golden/README.md` under a "CI wiring" section and leave a note for the operator +(cross-app boundary — `[project-standards] §E`). + +Final confirmation: run `pnpm verify` and confirm it is GREEN at 100% coverage WITHOUT the golden +suite contributing (golden files under `src/test/**` are coverage-excluded; `test:golden` is not in +the chain). Confirm the golden suite SKIPS cleanly under `pnpm test` when Docker is absent. Update +README with the final gate summary. + + + pnpm run verify 2>&1 | tail -20 && ! grep -E '"verify".*test:golden' package.json && grep -Eq "CI|master|golden-oracle|pre-deploy" src/test/golden/README.md + + +The golden suite runs only via `test:golden` as a master-only pre-deploy CI gate (job added to the +CI workflow OR documented in README for the operator with exact YAML), never in `verify` or a +Docker-less PR check. `pnpm verify` is GREEN at 100% coverage without the golden suite; the suite +skips cleanly under `pnpm test` with no Docker. README has the final gate summary. + + + + + + +## Trust Boundaries + +| Boundary | Description | +|----------|-------------| +| test → VPS S3 (capture) | Capture script reads production credentials from env and pulls real objects over the network/VPN. | +| committed fixtures → repo | Real production artifact JSONs are committed in-tree (potential PII/identity leakage). | +| RabbitMQ broker → consumer | A malformed/failing artifact nacks-with-requeue → infinite redelivery (resource exhaustion in the test). | + +## STRIDE Threat Register + +| Threat ID | Category | Component | Disposition | Mitigation Plan | +|-----------|----------|-----------|-------------|-----------------| +| T-v4e-01 | Information disclosure | capture-artifacts.sh + committed artifacts.tar.gz | mitigate | NO VPS host/key/cred values committed (env-only; verify negative-greps literal IPs). Real artifacts already flow through server-2 (no new exposure surface); SteamID masking is enforced server-side at the mapper, so `GET /stats/*` snapshots cannot contain Steam64 (re-verified in 14-02/14-03). | +| T-v4e-02 | Denial of service | RabbitMQ completed consumer | mitigate | Feed ONLY success/partial artifacts through the real broker; HARD bounded-poll timeout (30s) is the backstop against the nack-requeue loop; conflict + parse.failed use synthetic non-broker paths. | +| T-v4e-03 | Tampering | docker-compose / no package installs | accept | This task adds ZERO new runtime/dev dependencies (uses installed vitest/pg/amqplib/@aws-sdk) — no package-legitimacy gate required. | +| T-v4e-04 | Information disclosure | STATE blocker — legacy SSH/host values | mitigate | Never commit host/key values into planning docs or source (capture script reads env only; README references env var names, not values). | + + + +## Phase-level checks + +- `pnpm verify` GREEN at 100% coverage with the golden suite present and Docker absent (golden files + are coverage-excluded under `src/test/**`; `test:golden` not in the chain). +- `pnpm test` and `pnpm run test:coverage` SKIP the golden suite cleanly when Docker/archive are + absent (no failures). +- `pnpm run test:golden` with docker-compose PG(15432)/RabbitMQ(5673)/S3(9000) up + archive present: + full chain green, durable-job invariant + published-message contract asserted, normalized + full-surface snapshots stable, bounty anchors match hand-computed values, all six invariants pass. +- `bash -n` on both scripts; capture script hardcodes no host/key/cred/IP literals. +- `pnpm run typecheck` + `pnpm run lint` clean across all new files. + + + +- A `test:golden` suite at `src/test/golden/**` pins the current ingest→stats behavior against real + PG+RabbitMQ+S3 through the same factories `server.ts` wires (no mocked contract boundary). +- ONE shared fixture loader + ONE shared snapshot normalizer (no duplication; production types reused). +- Full-chain characterization oracle + 2-3 hand-computed bounty anchors + six pinned invariants + (durable-job, dedup, conflict, re-delivery idempotency, parse.failed, role-gate). +- Committed non-empty floor archive (parser-2 CLI) + gated env-driven VPS capture script + one-line + human run command in README. +- Golden suite OUT of `verify`/`test:coverage` (zero coverage obligation) and into a master-only + pre-deploy CI gate; skips cleanly without Docker/archive so `verify` stays green at 100%. +- Authored THROUGH the test/convention skills with cited rules; zero new dependencies. + + + +Create `.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-SUMMARY.md` when done. + diff --git a/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-RESEARCH.md b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-RESEARCH.md new file mode 100644 index 0000000..a305619 --- /dev/null +++ b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-RESEARCH.md @@ -0,0 +1,165 @@ +# Quick Task 260617-v4e: Golden E2E Integration Oracle — Research + +**Researched:** 2026-06-17 +**Domain:** server-2 ingest→stats pipeline regression oracle (Vitest 4 integration, real PG/RabbitMQ/S3) +**Confidence:** HIGH (call path + contracts traced in current code; one open item = VPS coordinates) +**Mode:** quick-task — decisions LOCKED in DEEP-BRAINSTORM.md; this validates/deepens mechanisms only. + +## Summary + +The full ingest→stats chain is wired exactly as the decision pack traced. The cross-app artifact contract holds: `server-2` `ParserArtifact` (`parser-artifact.ts:4`) is a structural subset of parser-2 `parse-artifact-v3`, and a parser-2-emitted artifact loads **as-is** (`loadParserArtifact` just does `JSON.parse(body)` — `storage/client.ts:87`). **Two load-bearing corrections to CONTEXT.md surfaced**: (1) server-2 does **NOT** verify SHA-256 of artifact bytes anywhere on the ingest path — `recordParserCompleted` stores `rawSnapshot` and only persists the checksums as metadata (`repository.ts:525-596`); the byte-verification lives in parser-2. So the fixture's `artifact_checksum` does **not** need to match its bytes for ingest to succeed. (2) The consume handler swallows errors and **nacks-with-requeue** (`rabbitmq.ts:126-128`) — a malformed/failing artifact will infinitely redeliver, so the bounded DB poll must have a hard timeout and the test must only feed artifacts that ingest cleanly. + +The bounty formula is small and hand-computable (`bounty.ts:124`): `points = round₂(1 · (1+playerFactor) · (1+squadFactor))`, `effectiveness = kills / max(1, deaths.total)`. Non-trivial bounty requires seeding a **previous rotation** with `player_stats`/`squad_stats` rows whose `stats` JSON carries `{kills, deaths:{total}}` (`repository.ts:1073-1153`). + +**Primary recommendation:** Build the oracle as `src/test/integration/golden/*.test.ts` mirroring `adapters.test.ts`/`postgres.test.ts` (docker-compose ports, `runMigrations()` in `beforeAll`, `truncate … cascade` in `beforeEach`). Drive promotion via `promotionService.promotePending()` directly (or `IntervalTask.runOnce()`), publish the real `parse.completed` to the real broker, then bounded-poll `parse_jobs.status='succeeded'`. Add a `test:golden` script that is **not** referenced by `verify`. The parser-2 golden floor (8 distinct OCAP inputs → CLI) is committable today; the hundreds-from-VPS capture is the only gated item. + +## Architecture Patterns + +### Real call path (validated against current code) + +``` +IntervalTask.runOnce() (or promotionService.promotePending directly) + → IngestPromotionService.promoteRecord (service.ts:72) + findReplayBySource → findReplayByChecksum → createReplay → createParseJob ← durable parse_jobs row HERE, in-tx, before any publish + markStagingPromoted + → [publishTask] ParseJobPublisher.publishQueued → queue.publishJson(parse.requested) (runtime.ts:73-80) + → TEST injects ParseCompletedMessage onto exchange `solid_stats.parser` rk `parse.completed` + → real broker → consumeParserResults.completed (rabbitmq.ts:74, runtime.ts:93-108) + artifactLoader.loadParserArtifact({bucket,key}) → S3 GetObject + JSON.parse (storage/client.ts:76-88) + repository.recordParserCompleted({...message, rawSnapshot: artifact}) (repository.ts:525) + locks job; no-op if terminal (idempotency); job→succeeded, replay→parsed, + supersede prior current parser_results, insert new current parser_results(raw_snapshot=artifact) + recalculation.recalculateParserResult(parserResultId, artifact) (recalculation.ts:45) + persistParserArtifact → parser_events + recalculatePlayerAndSquadStatsForParserResult → player_stats, squad_stats + recalculateCommanderSideStatsForParserResult → commander_side_stats + recalculateBountyPointsForParserResult → bounty_points (repository.ts:185) + → assert GET /stats/* via app.inject +``` + +### Await seam (no completion Promise) + +The consumer fires `void handleCompletedMessage` — there is no awaitable signal. Poll `parse_jobs.status='succeeded' AND parser_results.status='current' exists for replay_id` with a hard ceiling (e.g. 30s, 200ms interval). On failure the handler **nacks with requeue=true** (`rabbitmq.ts:127`), so a bad artifact loops forever — the poll timeout is the only backstop. Do NOT use real timers; a plain `await new Promise(setTimeout)` poll loop is acceptable per `[tests]` "Deterministic time" guidance since this is a real-broker integration test, not a unit loop test. + +## Validated Mechanisms (the 6 focus areas) + +### 1. Artifact contract — minimum field set & checksum truth + +`ParserArtifact` required fields (`parser-artifact.ts:4`): `contract_version: string`, `parser: Record`, `source: {…}`, `status: "success"|"partial"|"skipped"|"failed"`. Everything ingest reads downstream is **optional**: `players[]`, `weapons[]`, `destroyed_vehicles[]`, `diagnostics[]`, `side_facts`. The whole artifact is stored verbatim into `parser_results.raw_snapshot` (jsonb) and re-mapped by `mapParserArtifact` (`parser-artifact.ts:139`) into `parser_events`. + +Minimum **useful** fixture for a non-empty stats run: `contract_version`, `parser`, `source`, `status:"success"`, plus `players[]` with `eid`, `n`, compact counters (`k`/`d`/`tk`/…) and nested `kills[]` (`{c,v,w}`), and `weapons[]` for name resolution. `[VERIFIED: src/modules/statistics/parser-artifact.ts:4-281, repository.ts:569-583]` + +**A parser-2 artifact loads as-is.** parse-artifact-v3 `MinimalPlayerRow` (schema $defs:2257) is a superset of server-2 `PlayerRow` (extra `ck`,`eids`,`rn`,`tag` are ignored by TS structural typing; `s`/`n` nullable in schema, server reads them loosely). No deserialization gate, no `unevaluatedProperties` check at runtime. `[VERIFIED: storage/client.ts:87 — plain JSON.parse, no schema validation]` + +**CHECKSUM — CONTEXT.md is WRONG for server-2.** `recordParserCompleted` (`repository.ts:525-596`) takes `artifact_checksum`/`source_checksum` but **never recomputes or compares them to the bytes** — it stores `rawSnapshot` directly and the checksums only land in metadata when `rawSnapshot` is absent (`repository.ts:581`, `parserResultMetadata`). `grep` over `src/` confirms zero `sha256(`/`verifyChecksum` on the ingest path. **Consequence for the plan:** the fixture's `source.checksum` / message `artifact_checksum` can be any well-formed `^[0-9a-f]{64}$` value; bytes need not hash to it. (The byte-verification the brief mentions is a parser-2/worker responsibility, not server-2.) `[VERIFIED: repository.ts:525-596 + grep]` + +### 2. parser-2 CLI for the floor + +Binary `replay-parser-2` (Cargo bin in `crates/parser-cli`, `Cargo.toml:14`). Command (`main.rs:39,208`): +``` +cargo run --release --bin replay-parser-2 -- parse --input --output [--pretty] [--replay-id ] +``` +`cargo build` is confirmed by presence of `crates/parser-cli` with bin target; `parse_command` (`main.rs:249`) reads input, parses via `public_parse_replay`, writes ParseArtifact JSON to `--output`. + +**Golden corpus:** `crates/parser-core/tests/fixtures/golden/manifest.json` = **12 entries** but several reuse the same `.ocap.json`, so **8 distinct OCAP inputs**: `valid-minimal`, `invalid-json`, `metadata-drift`, `killed-events`, `side-facts`, `vehicle-context`, `aggregate-combat`, `combat-events`, `duplicate-slot-same-name`, `connected-backfill`. Named edge cases covered: winner_present/missing, vehicle_kill, teamkill, commander_side, null_killer, duplicate_slot_same_name, connected_player_backfill, partial/malformed. Note: `invalid-json` → `status:"failed"` and `combat-events`/`metadata-drift`/`killed-events` → `partial`; only `success`/`partial` artifacts are safe to feed the completed-consumer (a `failed` artifact still ingests but yields empty stats). `[VERIFIED: manifest.json (12 entries) + main.rs]` + +### 3. SSH capture mechanism + +S3 layout: `loadParserArtifact` reads `Bucket = message.artifact.bucket ?? config.s3.bucket`, `Key = message.artifact.key` (`storage/client.ts:79-81`). The existing integration `completedMessage` helper uses key prefix `artifacts/v3//.json` and bucket `solid-replays` (`postgres.test.ts:607-609`) — that is the observed artifact key convention. Default bucket `solid-replays`, S3 endpoint `http://localhost:9000` path-style (`adapters.test.ts:14-19`). + +**Capture script shape** (args/env-driven; VPS coords are the open item): +```sh +# inputs (env): VPS_HOST, VPS_S3_BUCKET (or path), MC_ALIAS or AWS creds, N +# 1. ensure Happ VPN bypass ip rule is active for VPS_HOST (else SSH/mc hangs) — see global memory happ-vpn-bypass-for-servers +# 2. mc cp --recursive //artifacts/v3/ $TMP/ (or aws s3 cp; or ssh + mc on the box) +# 3. (floor fallback) for each parser-2 golden ocap: replay-parser-2 parse --input … --output $TMP/.json +# 4. tar czf src/test/integration/golden/fixtures/artifacts.tar.gz -C $TMP . +``` +Prefer `mc` (MinIO client) or `aws s3 cp` against the VPS S3 endpoint over `scp`, since artifacts are S3 objects not files. The script must `--no-clobber` log captured-vs-skipped counts (no silent caps, principle 8). Unpack at test start to a tmp dir (`os.tmpdir()/golden-`), iterate with `test.each`. + +### 4. Determinism / normalization + +Non-deterministic fields entering the asserted surface, with recipe: + +| Surface | Non-det field | Source | Normalization | +|---------|---------------|--------|---------------| +| `replays`, `parse_jobs`, `parser_results`, `parser_events`, `player_stats`, `squad_stats`, `commander_side_stats`, `bounty_points`, `ingest_staging_records` | `id` (uuid) | `gen_random_uuid()` | map uuid→stable natural key (replay: `source_system+source_replay_id` or `checksum`; player: nickname/`steam_id`; squad: tag; job: replay-key+contract_version) | +| all tables | `created_at`/`updated_at`/`calculated_at`/`finished_at`/`published_at`/`started_at` | `now()` | redact → `""` | +| `parse_jobs` | `attempts` (stable=0 on happy path), `error` (null) | — | keep | +| FK columns (`replay_id`,`parse_job_id`,`parser_result_id`,`rotation_id`) | uuid | — | substitute via the same id→natural-key map | +| row ordering | DB scan order | — | sort rows by natural key before snapshot | +| `GET /stats/*` | embedded ids, slugs derived from names | — | same id map; slugs are derived/stable, keep | + +**Ordering that IS contractual — keep asserted order, do not sort:** cursor-paginated lists (`pagination/cursor.ts`), and the bounty `inputs.events[]` array (already sorted deterministically by the producer — `bounty.ts:68-79` `toSorted` on playerId; events are append-order within a player). The event timeline in `parser_events` uses `sourceRef` indices (`player_kill_index`, `destroyed_vehicle_index`) that are deterministic, so sort by `(eventType, observedPlayerRef, sourceRef-index)`. + +**Snapshot mechanism:** per `[tests]` repo convention, prefer **`toEqual` on a fully-normalized object** (strong oracle) for the bounty anchors and small tables, and **`toMatchFileSnapshot()`** for the large per-artifact characterization surface (Vitest 4 supports it; keeps hundreds of artifacts manageable and diff-reviewable). Snapshot files live under `src/test/integration/golden/__snapshots__/` (excluded from coverage by `src/test/**`). `[VERIFIED: vitest.config.ts:15]` + +### 5. Bounty previous-rotation seeding + +Path: `recalculateBountyPointsForParserResult` (`repository.ts:185`) → `assignReplayRotation` (needs a `rotations` row whose window contains `replay.replay_timestamp`) → if `status!=='assigned'` bounty is skipped (returns 0 rows). Then per scope, `loadPreviousBountyEffectiveness(rotationId,…)` (`repository.ts:1073`): +- finds the **previous** rotation = `rotations` row with `starts_at < current.starts_at` (latest such), +- reads that previous rotation's `player_stats`/`squad_stats` rows (matching `game_type`), +- extracts `{kills, deaths:{total}}` from each `stats` jsonb (`previousBountyStats`, `repository.ts:1138`). + +`effectiveness = kills / max(1, deaths.total)`; `points = round₂((1+playerEff)·(1+squadEff))` for an enemy `kill` with a known victim; teamkill/unknown/missing-victim → 0 (`bounty.ts:103-137`). + +**Minimum fixture state for a hand-computable anchor:** +1. Two `rotations`: `prev` (`starts_at` earlier) and `current` (window covering the artifact's `replay_timestamp`). +2. The artifact under test resolves to `current` (its `replay.replay_timestamp` inside the `current` window — set `ingest_staging_records.replay_timestamp` or rely on `source_replay_id` epoch derivation, `service.ts:173`). +3. In `prev` rotation, a `player_stats` row for the **victim player** with `stats = {"kills":K,"deaths":{"total":D}}` (matching `game_type`), and optionally a `squad_stats` row for the victim's squad. +4. The artifact contains an enemy `kill` (`players[].kills[].c="enemy_kill"`) where the victim's `eid` resolves to that seeded player. + +Worked anchor: victim prev stats kills=3 deaths.total=1 → playerEff=3; no squad → squadFactor=0 → `points = round₂(1·(1+3)·(1+0)) = 4.00`. Two such kills by one attacker → 8.00. Pick 2–3 cases: (a) player-only effectiveness, (b) player+squad effectiveness, (c) excluded teamkill = 0. `[VERIFIED: repository.ts:185-233,1073-1153 + bounty.ts:103-162]` + +### 6. Wiring + pitfalls + +- **`test:golden` script** (add to package.json, NOT in `verify`): + `"test:golden": "vitest run src/test/integration/golden --no-file-parallelism"`. + `verify` (`package.json:38`) chains `format,lint,typecheck,test,test:integration,openapi:check,ops…,test:coverage` — none of these will pick up the golden file IF it is named to escape their globs. **Critical:** `test:integration` globs `src/test/integration` (`package.json:26`), which **would** include `golden/`. Place the golden suite in a path `test:integration` does not match, OR name the script-target dir so it's excluded. Cleanest: put it at `src/test/golden/**` (NOT under `integration/`) so neither `test`, `test:integration`, nor `test:coverage` includes it — but `test:coverage` uses `include: ["src/**/*.ts"]` for measurement and `exclude: ["src/test/**"]` for coverage, so files under `src/test/**` are coverage-excluded regardless. Verify in plan that `test:integration`'s positional glob doesn't sweep it. +- **Coverage:** `src/test/**` is excluded from coverage (`vitest.config.ts:15`); `test:golden` is a separate invocation never run by `verify`/`test:coverage` → zero coverage obligation. The golden test contributes no lines to the 100% gate. `[VERIFIED: vitest.config.ts:5-28, package.json:24-38]` +- **Isolation:** unique S3 key per run (`artifacts/v3//.json`) and ephemeral queue OR reuse durable queues but purge — the topology is durable (`rabbitmq.ts:149`). Simplest: publish to the existing `parse.completed` queue but `truncate … cascade` between cases and use unique `job_id`/`replay_id`. For the broker, `consumeParserResults` binds the durable `server2.parse.completed` queue; to avoid cross-test bleed, run `--no-file-parallelism` (already the integration convention) and drain/await per case. +- **Pitfall — infinite requeue:** a handler exception nacks-requeue (`rabbitmq.ts:127`). If a fixture artifact triggers any throw (e.g. recalc on malformed data), it loops. Mitigation: only feed `success`/`partial` artifacts; hard poll timeout; assert the job reached terminal `succeeded` (not just "no error"). +- **Pitfall — `runOnce()` vs tasks:** `IntervalTask` exposes start/close; the decision pack says drive via `runOnce()`. Confirm `IntervalTask` has a `runOnce`/single-tick method in plan (else call `promotionService.promotePending(...)` and `publisher.publishQueued(...)` directly — both are public and what the tasks wrap, `runtime.ts:67,78`). + +## Don't Hand-Roll + +| Problem | Use Instead | +|---------|-------------| +| Schema/DDL for tables | `runMigrations(config.databaseUrl)` (`infra/db/migrate.ts`) — never mirror DDL `[tests Integration Harness]` | +| Fake repo / boundary mock | Real PG/RabbitMQ/S3 via docker-compose; mocking a contract boundary is anti-pattern #1 `[tests "What NOT to mock"]` | +| Config | `loadConfig(env)` with the fixed-port defaults from `adapters.test.ts` | +| Bounty expected values | recompute by hand from `bounty.ts` formula; assert with `toEqual` | +| Artifact parsing | feed real parser-2 CLI output / VPS objects — no synthetic toy blobs (principle 4) | + +## Project Constraints (from CLAUDE.md / AGENTS.md / skills) + +- Author through `solidstats-server-ts-tests` (cited as `[tests]` above): Vitest 4, `node` env, threads pool; per-layer map (repository/route = integration); co-location vs `tests/` dir; **strong oracles** (`toEqual` full shapes), coverage is a floor not proof; mock only true boundaries; deterministic time. `[tests TESTING.md]` +- `solidstats-server-ts-conventions`: 4-layer arch, `fastify-type-provider-zod`, Kysely, factory DI — the test reuses production factories, never re-implements them. +- `solidstats-shared-testing-standards`: AAA, isolation, determinism, real-infra-for-contracts. +- Do NOT pin request/moderation business logic (Phase 2 rewrites it) — only the role-gate (`requireRole`/`requireAnyRole`, `auth/routes/authorization.ts:25-29`; example protected route `admin/routes/rotations.ts:54` `requireRole(auth,"admin")`). +- Repo isolation convention is `truncate … cascade` (NOT fresh schema/db per test) — overrides the generic brief. +- GSD artifacts in English. + +## Assumptions Log + +| # | Claim | Risk if wrong | +|---|-------|---------------| +| A1 | VPS S3 bucket uses the same `artifacts/v3//.json` key prefix seen in tests | capture script glob wrong → fewer/zero objects; mitigated by floor | +| A2 | `IntervalTask` exposes a single-tick `runOnce()`; if not, call `promotePending`/`publishQueued` directly | trivial plan adjustment | +| A3 | A `partial`-status parser-2 artifact ingests without throwing in recalc | feed only verified-clean artifacts; hard poll timeout backstops | + +## Open Questions + +1. **VPS host + S3 bucket/path + creds + Happ VPN `ip rule` bypass** — required for the hundreds-capture; agent has no VPS access. Floor (8 parser-2 golden inputs via CLI) is committable now so the oracle is never empty. (P1 — user-owned) +2. **Conflict-review & parse.failed cases have no "real" artifact** — accept a small synthetic staging pair (conflict) and a hand-built `parse.failed` message (failure path) as documented exceptions to the real-data rule. (P2 — plan) +3. **`test:integration` positional glob** may sweep `src/test/integration/golden/**` into `verify` — place the golden suite at `src/test/golden/**` (outside `integration/`) and confirm no `verify` script targets it. (P1 — plan) + +## Sources + +### Primary (HIGH) +- `src/modules/statistics/parser-artifact.ts`, `src/infra/queue/messages.ts`, `src/modules/ingest/runtime.ts`, `service.ts`, `repository/repository.ts:525-596`, `src/infra/queue/rabbitmq.ts`, `src/infra/storage/client.ts`, `src/modules/statistics/service/recalculation.ts`, `src/modules/statistics/bounty/bounty.ts`, `src/modules/statistics/repository/repository.ts:185-233,1073-1153`, `src/modules/public-stats/routes/routes.ts`, `src/modules/auth/routes/authorization.ts` +- `src/test/integration/adapters.test.ts`, `src/modules/ingest/repository/tests/postgres.test.ts`, `.planning/codebase/TESTING.md`, `package.json`, `vitest.config.ts` +- `replay-parser-2/schemas/parse-artifact-v3.schema.json`, `crates/parser-cli/src/main.rs`, `crates/parser-core/tests/fixtures/golden/manifest.json` + +## Metadata +**Confidence:** call path HIGH; contracts HIGH; checksum/non-verification HIGH (grep-confirmed); VPS capture LOW (open item). **Valid until:** until Phase 2 Track C refactor lands (the whole point). diff --git a/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-REVIEW.md b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-REVIEW.md new file mode 100644 index 0000000..2b7d86d --- /dev/null +++ b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-REVIEW.md @@ -0,0 +1,99 @@ +# Code Review — Golden E2E Oracle (260617-v4e) + +**Branch:** `quick/260617-v4e-golden-e2e-oracle` +**Depth:** quick (rigorous on test correctness) +**Scope:** `src/test/golden/**` (pipeline / bounty / invariants tests, loader, normalize, harness, README, snapshots), `scripts/*.sh`, `package.json`, `.github/workflows/cd.yml`. Working-tree content reviewed (reflects the final fix; the 3 staged files extract the shared `goldenInfraReachable()` guard). +**Ruleset:** `solidstats-server-ts-code-review` (Phase-1 contract gate N/A — test-only change, no route schema touched), `solidstats-server-ts-tests`, `solidstats-shared-review-standards`. + +## API contract +N/A — change is test-only; no Fastify route schema, OpenAPI artifact, or `web`-facing shape is touched. Phase-1 gate passes vacuously. + +--- + +## Verdict: REQUEST CHANGES + +One 🟠 (over-redaction that makes a deterministic field assert nothing — the oracle's whole job is correctness) plus minor 🟡/🔵. No 🔴 BLOCK. The oracle is genuinely strong overall: real PG+RabbitMQ+S3, no mocked contract boundary, durable-job-before-publish assertion, hand-computed bounty anchors, role-gate 401/403/2xx, idempotent re-delivery — all pin real behavior, not vacuous shapes. The leak discipline and skip-clean design are sound. + +| # | Sev | Topic | File | +|---|-----|-------|------| +| 1 | 🟠 | normalization over-redaction (hides regression) | `fixtures/normalize.ts` | +| 2 | 🟡 | redundant double infra-probe (resource churn) | `fixtures/loader.ts` | +| 3 | 🟡 | invariants suite missing `describe.skipIf` parity | `invariants.golden.test.ts` | +| 4 | 🔵 | dead param / over-eslint-disable | `bounty-anchor.golden.test.ts` | +| 5 | 🔵 | README skip-guard claim drift | `README.md` | + +--- + +## 🟠 Findings + +### 1. The normalizer silently collapses every non-redacted `Date` column to `{}` — a deterministic field is pinned but asserts nothing +**File:** `src/test/golden/fixtures/normalize.ts:53-64`, observable in every snapshot (`__snapshots__/pipeline-*.snap.json:10` etc.) + +`pg` returns `timestamptz` columns as JS `Date` objects. `normalizeValue` only special-cases `string` and `Array`; a `Date` falls into the `typeof value === "object"` branch → `normalizeObject(value as Record)` → `Object.entries(date)` returns `[]` (a `Date` has no enumerable own properties) → the value is emitted as `{}`. + +`replay_timestamp` is **not** in `TIMESTAMP_KEYS`, so it is not intentionally redacted — yet every snapshot pins it as: +```json +"replay_timestamp": {}, +``` +This field is deterministic: the tests insert the fixed literal `'2026-05-09T00:00:00.000Z'`. It *should* be asserted as a real value (or, if non-deterministic elsewhere, added to `TIMESTAMP_KEYS` deliberately). Instead the normalizer destroys it. The oracle therefore would **not** catch a regression that drops, zeroes, or corrupts `replay_timestamp` (or any future non-`TIMESTAMP_KEYS` Date column — `valid_from`/`valid_to`/`expires_at` would silently meet the same fate). This is exactly the "over-redaction HIDES a real regression" failure the review focus calls out. `[tests §G strong oracle]` `[testing-standards §B determinism must not erase signal]` + +**Fix:** handle `Date` explicitly in `normalizeValue`, *before* the generic object branch, and redact only genuinely non-deterministic timestamps by key rather than by type: +```ts +export function normalizeValue(value: unknown, uuids: UuidMap): unknown { + if (value instanceof Date) { + return value.toISOString(); // deterministic Dates survive; key-based redaction in normalizeObject handles now()-driven ones + } + if (typeof value === "string") { + return UUID_PATTERN.test(value) ? uuids.token(value) : value; + } + // … +} +``` +Then either add `replay_timestamp` to `TIMESTAMP_KEYS` (if you consider it noise) or let the now-correct ISO string be pinned. Re-generate the snapshots after the fix and confirm `replay_timestamp` shows the fixed ISO literal, not `{}`. Also worth a one-line test in `normalize.test.ts` asserting a `Date` round-trips to its ISO string (guards the regression permanently). + +--- + +## 🟡 Findings + +### 2. `goldenInfraReachable()` opens and tears down the full PG+RabbitMQ+S3 client triple twice per suite +**File:** `src/test/golden/fixtures/loader.ts:84-122` + +`goldenInfraReachable()` calls `dockerReachable()`, which constructs `createDbClient`/`createQueueClient`/`createStorageClient`, probes, and closes them — *then* each `beforeAll` immediately rebuilds the same broker/storage/pool for the actual run. Not a leak (every client is closed in `dockerReachable`'s `finally` via `Promise.allSettled`), so principle 9 is satisfied — but it's a wasted connect/handshake cycle on every suite, and a RabbitMQ connect under a flaky CI broker is a needless extra failure surface. `[std: correctness §AB resource lifecycle — clean, just churny]` + +**Fix:** acceptable as-is for a slow master-only gate; if you want it tighter, have the probe reuse the suite's already-built clients, or accept the churn and note it. Low priority. + +### 3. `invariants.golden.test.ts` uses a bare `describe(...)`, not `describe.skipIf(!archivePresent())` like the other two suites +**File:** `src/test/golden/invariants.golden.test.ts:57` + +The pipeline and bounty suites guard the `describe` with `.skipIf(!archivePresent())` so no phantom case collects when the archive is absent; invariants does not. It is *functionally* safe — every `it` early-returns on `!infraReachable`, and `dockerReachable` (not `goldenInfraReachable`) is used so it doesn't even depend on the archive — but the inconsistency means a Docker-less run reports these 6 specs as **passing** rather than **skipped**, diverging from the "skip cleanly" contract the README advertises (line 38-39). Each test body is a vacuous early-`return` pass, which reads as green coverage of invariants that never actually ran. `[shared-review-standards §F test must not look like it asserted when it didn't]` + +**Fix:** for parity and honest reporting, gate the bodies through `it.runIf(infraReachable)` or wrap the describe so absent infra yields *skipped*, not *passed*, specs. At minimum align with the sibling suites' pattern. + +--- + +## 🔵 Findings + +### 4. `driveBounty` takes `currentRotationId` only to `void` it; broad file-level eslint-disable +**File:** `src/test/golden/bounty-anchor.golden.test.ts:298-299` + +`void currentRotationId;` — the parameter is unused (the rotation is resolved from DB state, not the arg). Dead parameter; drop it and the `void` line, or use it. Minor. Separately, the file-level `eslint-disable … no-magic-numbers, @typescript-eslint/no-unnecessary-condition` is broad; the `no-unnecessary-condition` blanket can mask a genuinely-always-true guard (e.g. the `?? ""` fallbacks that can never be hit could hide a real null). Prefer line-scoped disables. `[std: SKILL §A lint hygiene]` + +### 5. README claims `describe.skipIf(!archivePresent() || !dockerReachable)` +**File:** `src/test/golden/README.md:38` + +The actual guard is `describe.skipIf(!archivePresent())` (collection-time) plus a runtime `infraReachable` early-return — `dockerReachable` is **not** evaluable inside `describe.skipIf` (it's async; the loader's own comment at `loader.ts:111-113` says exactly this). The README sentence describes a guard shape that does not and cannot exist. Cosmetic but misleading to the next maintainer. `[shared-review-standards docs accuracy]` + +**Fix:** reword to match: "suites skip collection-time on `!archivePresent()` and runtime-skip the live block when the infra probe fails." + +--- + +## Non-Findings Checked (ruled out) + +- **Vacuous pass / empty snapshot:** snapshots are 4–11 KB of real normalized rows + 5 `GET /stats/*` bodies; bounty anchors use `toEqual(4|9|0)` hand-computed values (not snapshot-only); invariants assert concrete statuses/reasons/role codes. Not vacuous (except the `replay_timestamp` field, finding #1). +- **try/catch swallow:** the only `catch` is `dockerReachable`'s probe (intentional → skip), and `invariants.golden.test.ts:220-260`'s `try/finally` (no catch — `app.close()` guaranteed, errors propagate). No swallowing. +- **Leak discipline (principle 9):** `afterAll` closes broker, app, s3, storage, pool in all live suites; `pollUntil` uses plain `setTimeout` resolved each iteration (no dangling timer — the promise resolves before the next loop), bounded by a hard 30s deadline; no real signal handlers; no fake timers anywhere (correct for a live broker). `purgeParserQueues` and `publishCompleted` close their channel+connection. Clean. +- **Determinism — uuid + now() + ordering:** `UuidMap` first-seen tokens for ids and fks; `TIMESTAMP_KEYS` redacts now()-driven columns; `object_key`/`source_file` redacted for per-run/host paths; `normalizeRows` sorts unordered table dumps by natural key while contractual orders are deliberately *not* sorted. Sound — **except** the Date-type hole in finding #1. +- **Capture script secrets:** no hardcoded VPS host/IP/key/secret — all via `VPS_S3_*` env, validated with fail-loud missing-env check (`capture-artifacts.sh:46-54`); `mc alias remove` cleans the credential alias; Happ-VPN bypass reminder present (script + README). `[shared-project-standards security]` Clean. +- **Skip-clean / verify isolation:** `test`, `test:coverage`, `verify` all `--exclude 'src/test/golden/**'`; `golden-oracle` CI job is `push` + `master`/`main` gated, not a PR check, 30-min timeout. `verify` stays green Docker-less. Confirmed in `package.json:24-39` and `cd.yml:61-102`. +- **Duplication:** one loader, one normalizer, one harness; production `ParserArtifact` / `ParseCompletedMessage` types reused (not hand-mirrored); the shared `goldenInfraReachable()` is the fix's whole point. Clean. +- **Principle 7 (pins current behavior, doesn't "fix"):** `driveBounty` forces `game_type='sg'` and `parse_jobs.status='published'` to *reach* the real scope, not to alter output; comments document why. `bigint size_bytes` pinned as `"123"` string (pg behavior) — pinned as-is, correct. diff --git a/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-SUMMARY.md b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-SUMMARY.md new file mode 100644 index 0000000..be17ef5 --- /dev/null +++ b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-SUMMARY.md @@ -0,0 +1,169 @@ +--- +phase: 260617-v4e-golden-e2e-integration-oracle-for-ingest +plan: 01 +subsystem: testing +tags: [golden-oracle, characterization-test, integration, vitest, rabbitmq, postgres, s3, bounty, ingest, snapshot] + +# Dependency graph +requires: + - phase: ingest + statistics modules + provides: "PgIngestRepository, IngestPromotionService, ParseJobPublisher, PgStatisticsRepository, ParserResultRecalculationService, createRabbitMqParserRuntime, createStorageClient, buildApp + PgPublicStatsReadModel" +provides: + - "test:golden suite at src/test/golden/** — a master-only behavioral regression net pinning the CURRENT ingest→stats pipeline against real PG + RabbitMQ + S3" + - "ONE shared fixture loader/normalizer/harness + ONE shared archive+infra skip-guard" + - "Full-chain characterization oracle (9 floor artifacts), 3 hand-computed bounty anchors, 6 pinned invariants" + - "Committed parser-2 floor archive + gated env-driven VPS S3 capture script + README" +affects: [phase-2-track-c-behavior-preserving-refactor] + +# Tech tracking +tech-stack: + added: [] # zero new dependencies (uses installed vitest/pg/amqplib/@aws-sdk) + patterns: + - "Characterization (golden) oracle against real infra boundaries — no mocked contract boundary" + - "Clean-skip guard: live block runs only when archive present AND docker-compose infra reachable; otherwise skips (never fails)" + - "Bounded DB-state poll with a HARD timeout ceiling as the only backstop against the consumer nack-requeue loop" + - "Deterministic snapshot normalization: uuid→stable token map (ids + fks), timestamp redaction, natural-key row sort" + +key-files: + created: + - src/test/golden/fixtures/loader.ts + - src/test/golden/fixtures/normalize.ts + - src/test/golden/fixtures/harness.ts + - src/test/golden/fixtures/artifacts.tar.gz + - src/test/golden/pipeline.golden.test.ts + - src/test/golden/bounty-anchor.golden.test.ts + - src/test/golden/invariants.golden.test.ts + - src/test/golden/scripts/build-floor-archive.sh + - src/test/golden/scripts/capture-artifacts.sh + - src/test/golden/README.md + - src/test/golden/__snapshots__/ + modified: + - package.json # test:golden script; test + test:coverage exclude src/test/golden/** + +key-decisions: + - "Primary fix (golden excluded from test + test:coverage) was already committed by the prior executor; the remaining genuine work was the DRY skip-guard consolidation." + - "Added ONE shared goldenInfraReachable(config) guard (archive present AND infra reachable) so the pipeline + bounty suites no longer duplicate the archivePresent()+dockerReachable() probe sequence (principle 9)." + - "Invariants suite keeps the shared dockerReachable() probe directly — it consumes no fixture archive, so gating it on archive presence would wrongly skip it when only Docker is up." + +patterns-established: + - "Golden suite is a separate master-only gate (test:golden), OUT of verify/test:coverage → zero coverage obligation; src/test/** is coverage-excluded so verify stays at 100%." + +requirements-completed: [INGEST-02, INGEST-03] + +# Metrics +duration: ~20min +completed: 2026-06-18 +status: complete +--- + +# Phase 260617-v4e Plan 01: Golden E2E Integration Oracle for Ingest — Summary + +**A master-only `test:golden` suite that pins the current server-2 ingest→stats pipeline against real PostgreSQL + RabbitMQ + S3 (9 full-chain snapshots, 3 hand-computed bounty anchors, 6 invariants), wired OUT of `verify`/`test:coverage` and skipping cleanly without Docker; finished with a DRY skip-guard consolidation and full live + verify proof.** + +## Performance + +- **Duration:** ~20 min (verification + fix + summary only; the 6 implementation tasks were already done/committed) +- **Tasks:** 6 implementation tasks already complete (T1–T6); this session = fix + verification + SUMMARY +- **Files modified this session:** 3 (loader.ts, pipeline.golden.test.ts, bounty-anchor.golden.test.ts) + +## What was built (the 6 tasks — context) + +- **T1 Scaffold:** `test:golden` script (`vitest run src/test/golden --no-file-parallelism`); shared `loader.ts` (unpack archive, production `ParserArtifact` type, `archivePresent()` + `dockerReachable()` guards) and `normalize.ts` (uuid→token, ts redaction, row sort); README. +- **T2 Floor + full-chain oracle:** `build-floor-archive.sh` (parser-2 CLI over its golden OCAP corpus), committed `artifacts.tar.gz` (9 success/partial artifacts); `pipeline.golden.test.ts` drives promote → durable parse_jobs row (asserted before publish) → real broker `parse.requested` → real `parse.completed` consumer → S3 load → record + recalc → bounded-poll terminal `succeeded` → normalized full-surface snapshot incl. `GET /stats/*` via `app.inject`, through the SAME factories `server.ts` wires. +- **T3 Bounty anchors:** `bounty-anchor.golden.test.ts` — 3 hand-computed cases with a seeded previous rotation supplying effectiveness, asserted with `toEqual`: (a) player-only 4.00, (b) player+squad 9.00, (c) excluded teamkill 0. +- **T4 Invariants:** `invariants.golden.test.ts` — durable-job-before-publish, re-promote dedup (`duplicate_replay_id`), source-bytes conflict (`source_identity_changed_bytes`), re-delivery idempotency, synthetic `parse.failed` terminal-once, admin role-gate (401/403/2xx via shared `requireRole`). +- **T5 Capture script:** `capture-artifacts.sh` — gated, env-driven VPS S3 pull (no hardcoded host/key/IP), Happ-VPN ip-rule reminder, folds in the floor, logs captured-vs-skipped. +- **T6 CI gate:** README documents the master-only `golden-oracle` job; golden kept out of `verify`. + +## The fix applied this session + +The interrupted-`verify` root cause from the brief — `test`/`test:coverage` globbing the golden suite — was **already fixed and committed** at HEAD (`--exclude 'src/test/golden/**/*.test.ts'` present in both scripts in commit `dc4faa2`). So no package.json change was needed. + +The genuine remaining work was **required-fix item 2 (skip-clean robustness / principle 9 DRY)**. The three suites each re-implemented the `archivePresent()` → `dockerReachable()` probe sequence in their `beforeAll`. I consolidated this into **ONE shared guard helper** `goldenInfraReachable(config)` in `loader.ts` (returns true only when the archive is present AND PG+RabbitMQ+S3 answer a health probe), and rewired the pipeline + bounty suites onto it. The invariants suite keeps the shared `dockerReachable()` probe directly because it consumes no fixture archive (gating it on archive presence would wrongly skip it when only Docker is up). + +**Leak audit (required-fix item 3):** confirmed clean — every suite closes broker/app/s3/storage/pool in `afterAll`; the bounded `pollUntil` `setTimeout` always resolves (no dangling timer); no real signal handlers registered. The full golden run exits cleanly in ~4.6s with no open-handle/teardown warnings. + +## Files Created/Modified this session + +- `src/test/golden/fixtures/loader.ts` — added `goldenInfraReachable(config)`, the ONE shared archive+infra skip-guard. +- `src/test/golden/pipeline.golden.test.ts` — `beforeAll` now gates on `goldenInfraReachable`. +- `src/test/golden/bounty-anchor.golden.test.ts` — `beforeAll` now gates on `goldenInfraReachable`. + +## Verification + +Docker compose stack (`postgres:17` @15432, `rabbitmq:4` @5673, `minio` @9000, bucket `solid-replays` auto-created) was brought up and healthy. + +**`pnpm test:golden` — LIVE PASS: 3 files / 21 tests.** Proven live (not skipped): + +- pipeline: 9 full-chain snapshots match (aggregate-combat, combat-events, connected-backfill, duplicate-slot-same-name, killed-events, metadata-drift, side-facts, valid-minimal, vehicle-context) + 1 skip-doc case — ~270–380ms each = real broker round-trips. +- bounty anchors: (a) 4.00, (b) 9.00, (c) 0 all match hand-computed values via `toEqual`. +- invariants: all 6 hold (durable-job, dedup, conflict, re-delivery idempotency, parse.failed terminal-once, role-gate). +- **Skip-clean proven:** with infra pointed at dead ports the suite passes in 611ms (39ms tests) — live blocks early-return, zero failures. + +**`pnpm verify` — GREEN end-to-end, every step:** + +| Step | Result | +|------|--------| +| format (prettier --check) | PASS | +| lint (eslint) | PASS | +| typecheck (tsc --noEmit) | PASS | +| test (unit; golden + integration excluded) | PASS — 80 files / 681 tests | +| test:integration | PASS — 9 files / 191 tests | +| openapi:check (verify + ts-gen) | PASS | +| ops:backup:check | PASS | +| ops:boundary:check | PASS | +| test:coverage (golden excluded) | PASS — 89 files / 872 tests, **100% coverage** (stmts 3359/3359, branch 1578/1578, funcs 1071/1071, lines 3304/3304) | + +The golden suite contributes zero coverage obligation (`src/test/**` is coverage-excluded; `test:golden` is not in the chain) and `verify` holds 100%. + +## REVIEW findings addressed (260617-v4e-REVIEW.md) + +A follow-up code review (`260617-v4e-REVIEW.md`, verdict REQUEST CHANGES) raised 5 findings. Resolution this session: + +| # | Sev | Finding | Resolution | +|---|-----|---------|------------| +| 1 | 🟠 | `normalize.ts` collapses every non-redacted `Date` (e.g. `replay_timestamp`) to `{}` — a deterministic field that asserted nothing | **FIXED.** Added a `value instanceof Date` branch in `normalizeValue` (returning `.toISOString()`) **before** the generic-object branch. now()-driven columns (`created_at`/`updated_at`/`published_at`/`finished_at`/`started_at`/`calculated_at`) stay key-redacted to `` via `TIMESTAMP_KEYS`. Snapshots regenerated. Added permanent guard `fixtures/normalize.test.ts` (5 cases incl. Date→ISO round-trip and key-based redaction). | +| 2 | 🟡 | redundant/divergent infra-probe — invariants used `dockerReachable` directly while pipeline/bounty used the shared `goldenInfraReachable` | **FIXED (consolidated).** All three suites now use the single shared `goldenInfraReachable(config)` guard. The probe-then-build connect cycle is inherent to a health-probe-before-build design and is **accepted** as the reviewer recommended ("acceptable as-is for a slow master-only gate"). | +| 3 | 🟡 | invariants suite used a bare `describe(...)` → Docker-less run reported PASS, not skipped | **FIXED.** Switched to `describe.skipIf(!archivePresent())` for collection-time skip parity with the sibling suites; runtime infra absence still early-returns inside each `it` (matching the pipeline suite's pattern, since the async probe can't be awaited in `describe.skipIf`). | +| 4 | 🔵 | dead `currentRotationId` param + `void` line in `driveBounty` | **FIXED.** Dropped the unused param and the three call-site args (callers still use `seeds.currentRotationId` for `rotationBountyPoints`). The broad file-level eslint-disable was **kept** (consistent with the sibling golden suites; line-scoping every DB-column-literal/magic-number was judged disproportionate for test files). | +| 5 | 🔵 | README described a `describe.skipIf(!archivePresent() \|\| !dockerReachable)` guard that cannot exist | **FIXED.** Reworded the Running section to: collection-time skip on `!archivePresent()` + runtime infra early-return. | + +**`replay_timestamp` snapshot proof:** before → `"replay_timestamp": {}` (asserted nothing); after → `"replay_timestamp": "2026-05-09T00:00:00.000Z"` (the fixed staging literal, now genuinely pinned) across all 9 regenerated pipeline snapshots. now()-driven timestamps remain ``. + +**Re-verification:** `pnpm test:golden` GREEN — 4 files / 26 tests (the 21 live golden + 5 new `normalize.test.ts`); `pnpm verify` GREEN end-to-end with 100% coverage. Golden stays excluded from `test`/`test:coverage`/`verify`. + +## Skills cited + +- `solidstats-shared-testing-standards` §E (Determinism — a live-broker bounded poll is the permitted exception where deterministic clock control is impossible; the HARD timeout is the backstop), §F (doubles only at true boundaries — this oracle deliberately uses real PG/RabbitMQ/S3, no mocked contract boundary), §B (unit-vs-integration boundary — repository/route correctness needs real infra). +- `solidstats-server-ts-tests` (Integration Harness — `runMigrations()` for the real schema, `truncate … cascade` isolation, `app.inject`; Coverage gate — golden out of the coverage invocation). +- `solidstats-server-ts-conventions` (factory-DI — the oracle constructs the SAME production factories `server.ts` wires, never a hand-mirrored copy; queue reliability — durable `parse_jobs` row before any publish). +- `solidstats-shared-project-standards` (session hygiene — clean tree via committed work; DRY principle 9 — ONE shared guard helper, no duplication; documentation language — README in English). + +## Deviations from Plan + +The plan's 6 tasks were executed by a prior executor. This session deviated only in that the **primary diagnosed fix was already committed**, so the remaining work was the DRY guard consolidation (required-fix item 2) plus the verification + SUMMARY the prior executor was interrupted before completing. No scope creep; no production behavior changed (the oracle pins current behavior as-is, principle 7). + +## Issues Encountered + +- **GPG signing blocker (UNRESOLVED — needs user action):** all 6 prior commits on this branch are GPG-signed (`commit.gpgsign=true`), but git's commit path routes to the interactive `gnome3` pinentry which **times out** in this non-interactive session (loopback signing needs `/dev/tty`, unavailable here). The fix is fully **staged** (3 files) but **not yet committed**. The user must complete the commit so the GPG passphrase prompt can be answered. See "Action required" below. + +## Residual risks / notes + +- **VPS full-corpus capture is gated/manual:** `artifacts.tar.gz` is the committed parser-2 **floor** (9 artifacts). The hundreds of REAL production artifacts require the human to run `capture-artifacts.sh` with the VPS S3 env vars under the Happ-VPN ip-rule bypass; the live full-corpus run is the master-only CI job, not part of `verify`. +- The golden suite is a **master-only pre-deploy gate**, intentionally outside `verify`; PR checks that run without Docker never invoke it. + +## Action required (commit the staged fix) + +The 3-file fix is staged and verified. GPG pinentry could not complete non-interactively. Run **one** of: + +``` +# preferred — signed, consistent with the rest of the branch (answer the pinentry prompt): +git commit -m "test(260617-v4e): consolidate golden skip-guard into one shared helper" + +# or, if you accept an unsigned commit for this one: +git commit --no-gpg-sign -m "test(260617-v4e): consolidate golden skip-guard into one shared helper" +``` + +--- +*Phase: 260617-v4e-golden-e2e-integration-oracle-for-ingest* +*Completed: 2026-06-18* diff --git a/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-VERIFICATION.md b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-VERIFICATION.md new file mode 100644 index 0000000..6a25016 --- /dev/null +++ b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/260617-v4e-VERIFICATION.md @@ -0,0 +1,139 @@ +--- +phase: 260617-v4e-golden-e2e-integration-oracle-for-ingest +verified: 2026-06-18T00:00:00Z +status: human_needed +score: 8/8 +behavior_unverified: 0 +overrides_applied: 0 +human_verification: + - test: "Commit the staged 3-file DRY consolidation (goldenInfraReachable). Run `git commit` interactively so the GPG pinentry can answer. Then confirm `git status --short` is clean and all 6 prior commits on this branch are intact." + expected: "Working tree clean. `pnpm verify` still green. `pnpm run test:golden` with Docker up still reports 3 files / 21 tests PASS." + why_human: "The GPG-signing loop requires an interactive TTY; the agent cannot complete the commit non-interactively. The staged content is verified correct but the commit does not exist yet — the git tree is not clean, violating the project session-hygiene rule." +--- + +# Task 260617-v4e: Golden E2E Integration Oracle — Verification Report + +**Task Goal:** A behavioral regression oracle that pins the CURRENT ingest→stats pipeline behavior (real PG/RabbitMQ/S3), runs master-only (outside `verify`/coverage), uses a committed real-artifact floor + gated VPS capture, asserts full-surface characterization snapshots + hand-computed bounty anchors + the pinned invariants, and skips cleanly without Docker/the archive. + +**Verified:** 2026-06-18 +**Status:** human_needed +**Re-verification:** No — initial verification + +--- + +## Goal Achievement + +### Observable Truths + +| # | Truth | Status | Evidence | +|---|-------|--------|----------| +| 1 | Full production ingest→stats path runs end-to-end against real PG + RabbitMQ + S3, driven through the same factories server.ts wires | VERIFIED | `pipeline.golden.test.ts` imports and constructs `PgIngestRepository`, `IngestPromotionService`, `ParseJobPublisher`, `PgStatisticsRepository`, `ParserResultRecalculationService`, `createRabbitMqParserRuntime`, `createStorageClient`, `PgPublicStatsReadModel`, `buildApp` — identical to server.ts wiring. No mocked boundary. `test.each` over 9 floor artifacts. | +| 2 | Durable `parse_jobs` row exists BEFORE the RabbitMQ `parse.requested` publish | VERIFIED | `pipeline.golden.test.ts` lines 177–188: queries `parse_jobs` between `promotionService.promotePending()` and `publisher.publishQueued()`, asserts `status='queued'` before publish. `invariants.golden.test.ts` invariant 1 repeats same assertion in isolation. | +| 3 | A real `parse.completed` delivered through the live broker loads the S3 artifact, records result, recalculates aggregates/bounty, and state is observable via GET /stats/* backed by PgPublicStatsReadModel | VERIFIED | `pipeline.golden.test.ts` drives full round-trip: `publishCompleted()` → `consumeParserResults` consumer (broker wired) → `recordParserCompleted` → `recalculateParserResult` → `pollUntil(status=succeeded)` → `snapshotSurface()` asserts all DB tables + 5 GET /stats/* endpoints via `app.inject` with `PgPublicStatsReadModel`. 9 committed snapshots under `__snapshots__/`. SUMMARY confirms 3 files / 21 tests LIVE PASS. | +| 4 | Hand-computed bounty values on 2-3 anchor cases match persisted bounty_points with a seeded previous rotation | VERIFIED | `bounty-anchor.golden.test.ts`: (a) player-only kills=3/deaths=1 → `expect(alphaPoints).toEqual(4)` [4.00]; (b) player+squad kills=2/1 + squad 4/2 → `expect(alphaPoints).toEqual(9)` [9.00]; (c) teamkill → `expect(echoPoints).toEqual(0)`. All use `toEqual` with hand-computed values, not snapshot equality. Previous-rotation seeded with real INSERT into `rotations`/`player_stats`/`squad_stats`. | +| 5 | Re-promoting same staging row dedups; same-source different-bytes conflicts; re-delivered parse.completed records terminal state once | VERIFIED | `invariants.golden.test.ts` invariants 2, 3, 4, 5: dedup asserts `promotion_evidence.duplicate_replay_id` set; conflict asserts `conflict_details.reason='source_identity_changed_bytes'`; re-delivery calls `recordParserCompleted` twice, second returns null; `parse.failed` called twice, second returns false. All use production service/repository directly (no synthetic broker round-trip for conflict/failed). | +| 6 | Protected admin route rejects without role and accepts with role via shared requireRole pre-handler | VERIFIED | `invariants.golden.test.ts` invariant 6: `app.inject` to `POST /admin/rotations` — anon gets 401, non-admin session gets 403, admin-role session gets 2xx. Uses `InMemoryAuthUserRepository`/`InMemorySessionStore` + `buildApp({ auth })`, exercising the actual `requireRole` pre-handler. | +| 7 | The golden suite SKIPS cleanly (not fails) when Docker services or fixture archive are absent | VERIFIED | `describe.skipIf(!archivePresent())` at collection time in pipeline + bounty suites; `goldenInfraReachable()` guards `beforeAll` → early return; per-`it` `if (!infraReachable) return`. Invariants suite uses `dockerReachable()` directly (no archive dependency). SUMMARY confirms skip-clean proven: "infra pointed at dead ports → 611ms (39ms tests) — zero failures." | +| 8 | Golden suite wired OUT of verify and test:coverage (zero coverage obligation) and into a dedicated test:golden script | VERIFIED | `package.json` line 24: `test` excludes `src/test/golden/**/*.test.ts`. Line 25: `test:coverage` excludes `src/test/golden/**/*.test.ts`. Line 27: `test:golden` = `vitest run src/test/golden --no-file-parallelism`. `verify` script chains `pnpm test` + `pnpm run test:integration` + `pnpm run test:coverage` — none target `src/test/golden`. `test:integration` positionally targets `src/test/integration` + `*/tests/postgres.test.ts` — no golden overlap. | + +**Score:** 8/8 truths verified (0 present, behavior-unverified) + +--- + +### Required Artifacts + +| Artifact | Expected | Status | Details | +|----------|----------|--------|---------| +| `src/test/golden/fixtures/loader.ts` | ONE shared fixture loader — unpacks archive, exposes archivePresent() + dockerReachable() + goldenInfraReachable() guards, production ParserArtifact type | VERIFIED | 123 lines (exceeds 40 min). Imports `ParserArtifact` from production path. All three guard functions present and substantive. goldenInfraReachable() is staged (not yet committed). | +| `src/test/golden/fixtures/normalize.ts` | ONE shared snapshot normalizer — uuid→stable token map, timestamp redaction, deterministic row sort | VERIFIED | 109 lines (exceeds 40 min). UuidMap class, normalizeValue, normalizeRows, TIMESTAMP_KEYS set, source_file + object_key redaction. | +| `src/test/golden/fixtures/harness.ts` | Shared golden-suite helpers — TRUNCATE_ALL, pollUntil, publishCompleted, purgeParserQueues, snapshotSurface | VERIFIED | Present (not in PLAN artifacts but added by executor as DRY consolidation). Imports from messages.ts + normalize.ts. snapshotSurface covers 9 tables + 5 /stats/* endpoints. | +| `src/test/golden/pipeline.golden.test.ts` | Full-chain oracle: promote → durable job → real broker → consumer → recalc → GET /stats/* snapshot; test.each | VERIFIED | test.each over loaded fixtures. Full chain present. goldenInfraReachable guard staged. 9 snapshots committed. | +| `src/test/golden/bounty-anchor.golden.test.ts` | 2-3 hand-computed bounty anchors with seeded previous rotation, asserted with toEqual | VERIFIED | 3 cases: player-only=4, player+squad=9, teamkill=0. toEqual assertions. Previous-rotation seeded. goldenInfraReachable guard staged. | +| `src/test/golden/invariants.golden.test.ts` | Idempotency/conflict/role-gate invariants | VERIFIED | 6 invariants. durable-job, dedup (duplicate_replay_id), conflict (source_identity_changed_bytes), re-delivery (recordParserCompleted null on 2nd), parse.failed (false on 2nd), role-gate (401/403/2xx). | +| `src/test/golden/scripts/capture-artifacts.sh` | Gated VPS S3 capture script — env-driven, no hardcoded host/key/IP, logs counts, fails on missing env | VERIFIED | bash -n passes. VPS_S3_ENDPOINT/ACCESS_KEY/SECRET validated. Happ VPN reminder present. No literal IPs. captured/skipped/floor counts logged. Fails loudly on zero objects. | +| `src/test/golden/scripts/build-floor-archive.sh` | Committable floor: runs replay-parser-2 CLI over its golden OCAP corpus, packs artifacts.tar.gz | VERIFIED | bash -n passes. 9 named inputs (success/partial only). cargo check. Logs captured/skipped. Refuses empty archive. | +| `src/test/golden/README.md` | One-line human capture command, floor-build command, skip semantics, gate placement, CI wiring | VERIFIED | All present: one-line VPS capture command, floor build command, skip semantics doc, CI wiring section referencing cd.yml golden-oracle job. | +| `src/test/golden/fixtures/artifacts.tar.gz` | Committed non-empty floor archive | VERIFIED | File exists at 2215 bytes. Contains 9 JSON artifacts (matching build-floor-archive.sh INPUTS list; 9 snapshots committed). | +| `package.json test:golden script` | test:golden = vitest run src/test/golden --no-file-parallelism | VERIFIED | Present at line 27 exactly as specified. | +| `src/test/golden/__snapshots__/` | 9 file snapshots from floor artifacts | VERIFIED | 9 .snap.json files present (aggregate-combat, combat-events, connected-backfill, duplicate-slot-same-name, killed-events, metadata-drift, side-facts, valid-minimal, vehicle-context). Spot-checked pipeline-valid-minimal.snap.json — substantive normalized DB rows with uuid:N tokens and `` redaction. | + +--- + +### Key Link Verification + +| From | To | Via | Status | Details | +|------|----|-----|--------|---------| +| `pipeline.golden.test.ts` | `src/server.ts` (production factories) | Imports `createRabbitMqParserRuntime`, `PgPublicStatsReadModel`, `ParserResultRecalculationService`, `buildApp`, `PgIngestRepository`, `IngestPromotionService`, `ParseJobPublisher`, `PgStatisticsRepository`, `createStorageClient` — same factories server.ts wires, never re-implemented | VERIFIED | All 9 production factory imports confirmed in pipeline.golden.test.ts. | +| `pipeline.golden.test.ts` | `src/infra/db/migrate.ts` | `runMigrations(config.databaseUrl)` in beforeAll | VERIFIED | `runMigrations` import + call confirmed in beforeAll. | +| `loader.ts` | `src/test/golden/fixtures/artifacts.tar.gz` | `existsSync(ARCHIVE_PATH)` + `execFileSync("tar", ["xzf", ...])` at test start | VERIFIED | ARCHIVE_PATH computed via `new URL("artifacts.tar.gz", import.meta.url)`. archivePresent() checks existence. loadGoldenArtifacts() unpacks. | +| `package.json test:golden` | `src/test/golden` | `vitest run src/test/golden --no-file-parallelism` — path no verify-chained script targets | VERIFIED | test:golden targets `src/test/golden` directly. test, test:coverage both have `--exclude 'src/test/golden/**/*.test.ts'`. test:integration uses `src/test/integration` positionally — no golden overlap. verify chain never references test:golden. | +| `cd.yml golden-oracle job` | `pnpm run test:golden` | `if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main')` | VERIFIED | `.github/workflows/cd.yml` contains `golden-oracle` job with exact master-push gate, `run: pnpm run test:golden`, 30-minute timeout. Separate from `verify` job. | + +--- + +### Data-Flow Trace (Level 4) + +| Artifact | Data Variable | Source | Produces Real Data | Status | +|----------|---------------|--------|--------------------|--------| +| `pipeline.golden.test.ts` snapshotSurface | `surface` (9 DB tables + 5 /stats/* responses) | `pool.query(sql)` for each table + `app.inject` for each endpoint | Yes — real PG queries; `PgPublicStatsReadModel` wired into `buildApp` so /stats/* reflects DB state | FLOWING | +| `bounty-anchor.golden.test.ts` rotationBountyPoints | `points` from `bounty_points` table | `pool.query("select points from bounty_points where rotation_id=$1 and player_id=$2")` after real recalc | Yes | FLOWING | +| `invariants.golden.test.ts` promotion_evidence / conflict_details | JSONB columns read via raw pool.query | `pool.query("select status, promotion_evidence from ingest_staging_records ...")` | Yes | FLOWING | + +--- + +### Behavioral Spot-Checks + +Step 7b: SKIPPED (requires live Docker/RabbitMQ infra; per the task prompt the executor already ran `pnpm test:golden` LIVE with 3 files / 21 tests green). The verifier cannot and must not re-run the full suite. Skip-clean behavior verified structurally: `describe.skipIf(!archivePresent())` + `if (!infraReachable) return` pattern confirmed in all three test files. + +--- + +### Probe Execution + +No probe-*.sh files declared in PLAN. capture-artifacts.sh and build-floor-archive.sh are operator tools (require VPS/cargo) — not testable in this environment. Both pass `bash -n` syntax validation. + +--- + +### Requirements Coverage + +| Requirement | Source Plan | Description | Status | Evidence | +|-------------|-------------|-------------|--------|---------| +| INGEST-02 | 260617-v4e-PLAN.md | Ingest pipeline behavioral regression coverage | SATISFIED | pipeline.golden.test.ts + invariants cover full ingest→stats chain against real infra | +| INGEST-03 | 260617-v4e-PLAN.md | Bounty correctness verification | SATISFIED | bounty-anchor.golden.test.ts hand-computed anchors with seeded rotation effectiveness | + +--- + +### Anti-Patterns Found + +| File | Line | Pattern | Severity | Impact | +|------|------|---------|----------|--------| +| `src/test/golden/scripts/capture-artifacts.sh` | 56 | `XXXXXX` in mktemp pattern matched by debt-marker grep | Info | False positive — mktemp template, not a debt marker. | +| `src/test/golden/scripts/build-floor-archive.sh` | 56 | Same mktemp pattern | Info | False positive — not a debt marker. | + +No real TBD/FIXME/XXX markers. No stub implementations. No hardcoded empty data. No orphaned artifacts. + +--- + +### Human Verification Required + +#### 1. Commit the staged DRY consolidation (goldenInfraReachable) + +**Test:** Run `git commit -m "test(260617-v4e): consolidate golden skip-guard into one shared helper"` in an interactive terminal (so GPG pinentry can complete). + +**Expected:** Commit succeeds. `git status --short` is clean. The 3 staged files (loader.ts, pipeline.golden.test.ts, bounty-anchor.golden.test.ts) are committed. `pnpm verify` remains green. `pnpm run test:golden` with Docker up reports 3 files / 21 tests PASS. + +**Why human:** GPG signing requires interactive TTY (`/dev/tty`) unavailable to the agent. The staged content has been verified correct at every level — this is exclusively a commit-delivery gap, not an implementation gap. The code exists and is wired; only the git commit is missing. + +**Note:** The files read during this verification reflect the staged (working-tree) content, which IS the goldenInfraReachable-consolidated version. The committed tree (pre-stage) has the older duplicated archivePresent()+dockerReachable() pattern in pipeline and bounty suites, but the staged content is what the SUMMARY documents and what was verified LIVE. Either version satisfies the must-haves (both skip cleanly; the staged version is the DRY-consolidated form). The human_needed here is solely for git hygiene. + +--- + +### Gaps Summary + +No behavioral or functional gaps. All 8 must-have truths verified, all artifacts present and substantive, all key links wired, all snapshots committed and non-empty. + +The single human_needed item is a git-hygiene gap: the DRY guard consolidation (goldenInfraReachable) is staged but not committed due to GPG pinentry unavailability. The implementation is verified correct. Running `git commit` interactively closes this. + +--- + +_Verified: 2026-06-18_ +_Verifier: Claude (gsd-verifier)_ diff --git a/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/DEEP-BRAINSTORM.md b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/DEEP-BRAINSTORM.md new file mode 100644 index 0000000..072cdb2 --- /dev/null +++ b/.planning/quick/260617-v4e-golden-e2e-integration-oracle-for-ingest/DEEP-BRAINSTORM.md @@ -0,0 +1,169 @@ +# Deep Brainstorm Brief — Golden end-to-end integration oracle + +## Context +- Date: 2026-06-17 +- Request: Build golden E2E integration test(s) that pin the current observable behavior of the + ingest→stats pipeline as a behavioral regression oracle BEFORE the Phase 2 `server-2` refactor. +- GSD stage: pre-quick brainstorm (decision pack feeding `/gsd-quick --full`). +- Target outcome: locked decision pack → `/gsd-quick --full`. +- Artifact owner: Pavlov Alexandr. +- Source brief: `/tmp/golden-integration-test-prompt.md` (reusable, stack-agnostic). + +## Goal +A behavior-preserving regression oracle for the ingest→stats pipeline plus the public read surface, +so the upcoming **Phase 2 `server-2` Track C refactor** — Oxfmt mass-reformat + full Oxlint (blocking +type-aware) + `tsc`→tsdown (2-entry `server.ts` + `migrate.ts`) + dependency-cruiser + knip + lefthook, +explicitly **behavior-preserving** — cannot silently change computed values or pipeline behavior. +It complements (does not replace) the unit suite, the `frozen-contract` + oasdiff contract gate, and +`verify`. The contract gate guards API **shape**; this oracle guards **computed values and pipeline +behavior**, which the shape gate is blind to. + +## Users And Workflows +- Developers executing the Phase 2 refactor: the oracle is the safety net — green ⇒ behavior preserved, + red ⇒ behavioral drift to investigate. +- Runs **only on master, pre-deploy**, as a dedicated slow CI job. Not part of per-PR `verify`. + +## Scope +### Must Have +- One golden E2E test driving the **real** production path per real artifact: + staging-row promote (`IntervalTask.runOnce()` → `IngestPromotionService`, `src/modules/ingest/service.ts`) + → durable `parse_jobs` row + RabbitMQ publish (`src/modules/ingest/publisher.ts`) + → real broker delivery → `ParseCompletedMessage` consumer (`src/infra/queue/rabbitmq.ts`) + → real S3 artifact load (`artifactLoader.loadParserArtifact({bucket,key})`, `src/modules/ingest/runtime.ts:94`) + → `recordParserCompleted()` (`src/modules/ingest/repository/repository.ts:525`) + → `ParserResultRecalculationService.recalculateParserResult()` (`src/modules/statistics/service/recalculation.ts`) + → aggregates + bounty → assert via `GET /stats/*` (`src/modules/public-stats/...`). +- **Real infrastructure** (mirror existing integration harness): real PostgreSQL + real RabbitMQ + real + MinIO/S3 on the docker-compose fixed ports (PG `15432`, Rabbit `5673`, S3 `9000`), real schema via + `runMigrations()`, `truncate … cascade` isolation, **unique S3 keys + ephemeral queue per run**. +- **Fixtures**: hundreds of **real** `ParserArtifact` JSONs captured once from real `replay-parser-2` + output, committed as a **gzip archive** in-tree, **unpacked at test start**, iterated with `test.each`. +- **Assertions** (full observable contract, principle 5): characterization golden snapshots of + `parser_results` (+ all evidence fields), `parser_events`, `player_stats`, `squad_stats`, + `commander_side_stats`, `bounty_points`, terminal `parse_jobs`, `ingest_staging_records` status/evidence, + and `GET /stats/*` responses — with deterministic normalization — **plus** hand-computed bounty + assertions on 2–3 anchor cases (values are business-critical: check semantics, not only the snapshot). +- **Idempotency / invariants pinned** (principles 6–7, current behavior as-is): + - durable `parse_jobs` row exists **before** the RabbitMQ publish (never fire-and-forget). + - re-promote same staging row → dedup/no-op: `status='promoted'` + `promotion_evidence.duplicate_replay_id`. + - same `source_system`+`source_replay_id`, **different** checksum/bytes → `status='conflicted'` + + `conflict_details.reason='source_identity_changed_bytes'` (`service.ts:147`). + - checksum-duplicate (no source match) → `status='promoted'` + duplicate evidence appended (`service.ts:166`). + - re-deliver same `parse.completed` → terminal state recorded once. +- **Auth/role gate** (flow 4): a protected route rejects without role / accepts with role via the shared + `requireRole`/`requireAnyRole` pre-handlers (`src/modules/auth/routes/authorization.ts`). +- **Capture script** (gated — agent lacks VPS access, principles 4 & 8): **pulls real production + `ParserArtifact` objects from the VPS over SSH** (the actual artifacts `server-2` already ingested) → + packs them into the committed gzip archive. One-line human command run under `!`, documented. Note: + both machines run Happ VPN always-on — traffic to own VPSes must bypass it via an `ip rule` or SSH + hangs (global memory `happ-vpn-bypass-for-servers`). Local fallback floor = the ~10–13 + `replay-parser-2` golden inputs via its CLI, if the VPS pull is unavailable. +- **Shared helpers extracted** (principle 9, no duplication): one fixture-loader/unpacker and one + snapshot-normalizer, reused — never a hand-mirrored schema/DDL copy. +- Produced **through `solidstats-server-ts-tests`** conventions, citing the rules relied on (convention-bound). + +### Nice To Have +- Scale the archive to hundreds by pointing the capture script at a larger real corpus (full + fetcher→parser run / mounted `~/sg_stats`). + +### Non Goals +- request/moderation **business-logic workflow** — Phase 2 deliberately rewrites it into guided flows; + pinning it would create false reds. Only the **role-gate mechanism** is in scope, not request types/payloads. +- **Not** wired into fast `verify` / `test:coverage`; **no coverage obligation** (principle 10). +- **No** fresh-schema/bucket/db per test — the generic brief suggests it, but the repo convention is + `truncate … cascade`; Step 0 says repo conventions override the generic brief. +- **Not** a parity/value-vs-legacy comparison — that is the cutover diff harness. This pins `server-2`'s + own **current** behavior (the parsers are intentionally non-identical to legacy; see CUTOVER-MODEL). + +## Confirmed Decisions +| Decision | Choice | Rationale | Consequence | +|----------|--------|-----------|-------------| +| Refactor protected | Phase 2 `server-2` Track C convergence (behavior-preserving) | Mass Oxfmt reformat + `tsc`→tsdown are classic silent-runtime-drift sources; contract gate is blind to values | Oracle targets computed values + pipeline behavior | +| Scope | Full ingest→stats chain **+ public read API** assertion | Pins what frozen-contract/oasdiff cannot (values, pipeline) and confirms read surface returns them | Largest fixture/wiring surface; accepted | +| Realism | Real RabbitMQ **and** real MinIO/S3 round-trip via `buildApp()` + real runtime | A mock at a contract boundary hides the exact failures the oracle exists to catch (anti-pattern #1) | Slower, needs an await-seam; fine for a master-only job | +| Fixtures storage | Hundreds of **real** artifacts committed as one **gzip archive**, unpacked at test start | Artifacts are small; self-contained, reproducible, no external gating/skips | Archive lives in git; regenerate via capture script | +| Fixtures source | **Pull real production artifacts from the VPS over SSH** (gated, human-run script); parser-2 golden parse as local fallback | Real production data (principle 4) — the actual objects prod ingested; agent lacks VPS/VPN access | One-time gated capture under `!`; needs Happ VPN bypass for SSH | +| Assertions | Characterization snapshots (normalized) **+** computed bounty anchors on 2–3 cases | Only snapshots scale to hundreds; bounty is business-critical → also checked semantically | Needs a normalization layer + a few hand-computed expectations | +| Determinism | Drive promotion via `IntervalTask.runOnce()`; await completion via **bounded DB-state poll** | No injectable clock/id/pacer; consumer exposes no completion Promise; no real timers/handlers (principle 9) | Poll loop with generous timeout (test may run long) | +| Normalization | Map UUID → stable natural key (checksum / nickname / replay), redact timestamps, sort rows | DB generates `gen_random_uuid()` / `now()` (not injectable) | Keep ordering asserts where ordering is contractual (cursor, timeline) | +| Gate placement | Dedicated script (e.g. `test:golden`) + **master-only pre-deploy** CI job; **not** in `verify`/`test:coverage` | User: "не часть быстрого verify, только в мастере перед деплоем; может идти долго" | Zero coverage obligation; `verify` green without fixtures automatically (principles 8, 10) | +| Cross-app contract | `server-2` `ParserArtifact` == parser-2 `parse-artifact-v3` (traced, matches) | `runtime.ts:94` loads `ParserArtifact`; schema fields align, no version mismatch | Re-validate at capture; pin `parser_contract_version` | +| Conventions | Author through `solidstats-server-ts-tests` (+ shared testing standards) | Convention-bound test work; repo skills override generic brief | Cite rules; match harness/naming/isolation | + +## Assumptions +| Assumption | Confidence | Evidence | How To Validate | +|------------|------------|----------|-----------------| +| `server-2` ingests exactly the shape parser-2 emits (`parse-artifact-v3`) | High | Traced: `messages.ts:28`, `parser-artifact.ts:4`, `runtime.ts:94`; schema match | Validate a captured artifact loads + persists in research | +| `replay-parser-2` CLI builds and emits artifacts here | Medium | `crates/parser-cli` with `replay-parser-2` binary present | `cargo run --bin replay-parser-2 -- ` dry-run in research | +| The VPS holds hundreds of real `ParserArtifact` objects pullable over SSH | High (user-confirmed) | User: pull artifacts from the VPS over SSH | Confirm exact host/path-or-bucket + creds in research; Happ VPN bypass needed | +| `parser_contract_version` is stable across the behavior-preserving refactor | High | Refactor is toolchain-only, no parser change | Pin the value; document regen if it bumps | +| Master pre-deploy CI provides docker-compose PG/Rabbit/S3 | High | Existing `test:integration` job already needs them | Reuse that job's service setup | + +## Backend And Infrastructure Notes +| Topic | Decision/Default | Consequence | Hidden Cost | Breaking Point | +|-------|------------------|-------------|-------------|----------------| +| Message bus | Real RabbitMQ round-trip | Catches serialization/topology/ack regressions | Async flake; needs await-seam | Broker contract change | +| Object store | Real MinIO/S3 read of the artifact | Catches key-layout/serialization regressions | Per-run unique keys + cleanup | S3 key convention change | +| Snapshots | Committed golden, normalized | Scales to hundreds; any drift turns red | Normalization must be exact or it's brittle | Non-deterministic field leaks into snapshot | +| Run placement | Master-only, slow, separate job | No per-PR cost; no coverage obligation | Drift caught later (pre-deploy, not per-PR) | Someone wires it into `verify` by mistake | + +## Risks +| Risk | Severity | Why It Matters | Mitigation | +|------|----------|----------------|------------| +| Snapshot brittleness from non-deterministic fields | High | False reds erode trust in the oracle | One audited normalization layer (stable id map, redacted timestamps, sorted rows); determinism re-run check | +| `parser_contract_version` bump regenerates fixtures/snapshots | Medium | Oracle churns on unrelated parser releases | Pin version; document one-line regen command | +| Real-broker async, no completion Promise | Medium | Naive `sleep` is flaky/leaky | Bounded DB-state poll; `runOnce()`; no real timers/handlers (principle 9) | +| Archive size in git (hundreds of gz JSON) | Low-Med | Repo weight | Single compressed archive; revisit git-lfs/out-of-tree only if it grows large | +| VPS access for capture (SSH + Happ VPN bypass) | Medium | Capture hangs/fails without the `ip rule` bypass; agent cannot do it | Human runs the gated script under `!` with the bypass in place; commit the parser-2 golden floor so the oracle is never empty; log captured-vs-skipped (no silent caps) | +| Pinning behavior adjacent to the request rewrite | Medium | Phase 2 rewrites request/moderation → would false-red | Keep request business logic OUT; pin only role-gate + ingest/stats | +| Normalization hides a real id/order regression | Medium | Masks a true bug | Keep ordering assertions where order is contractual (cursor pagination, event timeline) | + +## Acceptance Criteria +- The oracle drives the **real** pipeline end-to-end (real PG/Rabbit/S3) for every artifact in the + archive and asserts: the normalized full-surface snapshot; bounty anchors (2–3 hand-computed); + the durable-job-before-publish invariant; dedup vs conflict branching; re-deliver terminal-once; + and the protected-route role gate. +- Runs via a dedicated script (e.g. `pnpm test:golden`) on **master pre-deploy only**; it is **not** in + `verify` and **not** in `test:coverage`; fast `verify` stays green at 100% coverage, untouched. +- Capture script + one-line human command committed; the gzip archive committed (or script + a note if + the larger corpus must be captured in a specific environment). +- Snapshots are generated from **current (pre-refactor)** code; re-running on current code is byte-stable + across runs (determinism proven, principle 6). +- Produced through `solidstats-server-ts-tests`, citing the rules relied on; shared helpers extracted + (one definition, no duplication). + +## Verification Plan +- Generate snapshots on current master → re-run twice → identical (determinism). +- Confirm the coverage gate is untouched (oracle excluded from `test`/`test:coverage`). +- Confirm `verify` is green **without** the archive present (skip-clean path, principle 8). +- Dry-run the capture script over the ~10–13 `replay-parser-2` golden inputs; load one artifact through + the real consumer to prove the cross-app contract. + +## Open Questions +| Priority | Question | Why It Matters | Owner/Status | +|----------|----------|----------------|--------------| +| P1 | Exact VPS host + path/bucket of the real artifacts, credentials, and the Happ VPN bypass for SSH | The gated capture script needs concrete coordinates | Research / spike (user has access) | +| P1 | Archive layout: single `.tar.gz` vs per-artifact `.json.gz`; unpack to tmp vs in-memory | Loader design + git diff noise | Plan | +| P1 | Exact normalization natural keys per table (checksum / nickname+replay / squad tag) | Snapshot stability hinges on it | Research / plan | +| P2 | Conflict-review case has no "real" artifact — accept a small synthetic staging pair for that one invariant? | One documented exception to the real-data rule | Plan | +| P2 | Capture a real `parse.failed` case for the failure-path terminal-state assertion | Cover the failure branch with real data | Plan | + +## Question Ledger +| Priority | Question | Answer | Decision Impact | +|----------|----------|--------|-----------------| +| P0 | Scope of the oracle | Full chain + read API | Pins values + pipeline + read surface | +| P1 | Fixtures: real vs synthetic | Real, captured | Capture script + real artifacts | +| P1 | Realism / boundaries | Maximal: real broker + S3 | Real round-trip, await-seam needed | +| P1 | Runtime / gate budget | Master-only pre-deploy, may run long, not in `verify` | Dedicated slow job; no coverage obligation | +| P0 | Where do "hundreds" live / run | Commit a gzip archive, unpack at test time | Self-contained in-tree corpus | +| P1 | Assertion mechanism | Snapshots + bounty anchor | Characterization + semantic anchor | + +## Recommended Next GSD Step +- **Primary: `/gsd-quick --full`** (discussion + research + plan-check + verify), as the brief mandates. + The real call path is already traced (hand the citations above to research); the research phase must + still **confirm the SSH capture mechanism** (VPS host + path/bucket + Happ VPN bypass → archive → + unpack) before planning, and the test must be authored through `solidstats-server-ts-tests`. +- Alternative: **`/gsd-spike`** first if the capture mechanism is uncertain (prove: SSH-pull one real + artifact from the VPS; it round-trips the real consumer; archive unpack + normalization is stable). +- Alternative: **`/gsd-plan-phase`** if this is treated as a phase rather than a quick task (the full + real-broker/S3 oracle + capture tooling is arguably phase-sized, not quick-sized). diff --git a/.prettierignore b/.prettierignore index 4f77895..683e0fe 100644 --- a/.prettierignore +++ b/.prettierignore @@ -6,3 +6,8 @@ node_modules gsd-briefs openapi AGENTS.md + +# Golden oracle file snapshots are machine-generated by `test:golden` +# (vitest toMatchFileSnapshot, JSON.stringify(...,2)); prettier must not rewrite +# them or the next `test:golden` run would mismatch the committed snapshot. +src/test/golden/__snapshots__ diff --git a/package.json b/package.json index 7b9e162d..449bd56 100644 --- a/package.json +++ b/package.json @@ -21,9 +21,10 @@ "format": "prettier --check .", "lint": "eslint .", "typecheck": "tsc --noEmit", - "test": "vitest run --exclude 'src/test/integration/**/*.test.ts' --exclude 'src/**/tests/postgres.test.ts'", - "test:coverage": "vitest run --coverage --no-file-parallelism", + "test": "vitest run --exclude 'src/test/integration/**/*.test.ts' --exclude 'src/**/tests/postgres.test.ts' --exclude 'src/test/golden/**/*.test.ts'", + "test:coverage": "vitest run --coverage --no-file-parallelism --exclude 'src/test/golden/**/*.test.ts'", "test:integration": "vitest run src/test/integration $(find src -path '*/tests/postgres.test.ts' -print) --no-file-parallelism", + "test:golden": "vitest run src/test/golden --no-file-parallelism", "test:schema": "vitest run src/test/integration/schema.test.ts", "db:migrate": "tsx src/infra/db/migrate.ts", "openapi:export": "tsx src/openapi/export-openapi.ts", diff --git a/src/test/golden/README.md b/src/test/golden/README.md new file mode 100644 index 0000000..013ea14 --- /dev/null +++ b/src/test/golden/README.md @@ -0,0 +1,96 @@ +# Golden end-to-end integration oracle + +A behavioral regression net that pins the **current observable behavior** of the +`server-2` ingest → stats pipeline (plus the public read surface and the auth role-gate) +against **real PostgreSQL + RabbitMQ + S3**, driven through the same factories `server.ts` +wires. It exists to catch integration-level drift before a behavior-preserving refactor: +a refactor must keep this suite green; any behavioral drift must turn it red. + +This is a **separate, master-only, slow** gate — it is **NOT** part of `pnpm verify` and +carries **zero coverage obligation** (`src/test/**` is coverage-excluded). + +## Layout + +``` +src/test/golden/ + fixtures/ + loader.ts # ONE shared loader/unpacker + archivePresent()/dockerReachable() skip guards + normalize.ts # ONE shared snapshot normalizer (uuid->token map, timestamp redaction, row sort) + artifacts.tar.gz # committed floor corpus (real parser-2 artifacts) — never empty + scripts/ + build-floor-archive.sh # committable floor: parser-2 CLI over its golden OCAP corpus + capture-artifacts.sh # gated VPS S3 capture of the hundreds-of-prod corpus (human-run, once) + pipeline.golden.test.ts # full-chain characterization oracle + bounty-anchor.golden.test.ts # hand-computed bounty anchors + invariants.golden.test.ts # idempotency / dedup / conflict / re-delivery / role-gate + __snapshots__/ # file snapshots (coverage-excluded) +``` + +## Running + +```sh +# bring up the docker-compose PG(15432) / RabbitMQ(5673) / S3(9000) services, then: +pnpm run test:golden +``` + +`test:golden` runs `vitest run src/test/golden --no-file-parallelism`. No `verify`-chained +script (`test`, `test:integration`, `test:coverage`) targets `src/test/golden`. The suites +**skip collection-time** on `describe.skipIf(!archivePresent())` and **runtime-skip** the live +block when the infra probe fails (`goldenInfraReachable` cannot be awaited inside +`describe.skipIf`, so the live bodies early-return on the per-suite `infraReachable` flag). So +when Docker or the archive are absent they **SKIP cleanly** — `pnpm verify` and `pnpm test` +stay green at 100% without them. + +## Building the committed floor + +The floor is the parser-2 golden corpus parsed through its CLI, so the oracle is never empty +without VPS access: + +```sh +bash src/test/golden/scripts/build-floor-archive.sh [PARSER_REPO_DIR] +``` + +Only `success`/`partial` artifacts are packed — the `parse.completed` consumer +nack-requeues on any throw (`rabbitmq.ts:126-128`), so a `failed` artifact fed through the +real broker would redeliver forever. The conflict and `parse.failed` invariants therefore +use synthetic non-broker paths. + +## Capturing the full production corpus (gated, master-only CI) + +The hundreds-of-real-artifacts corpus lives on the production VPS S3 bucket; the agent has +no VPS access, so the human runs the capture once. **Happ VPN is always-on** — ensure the +`ip rule` bypass for the VPS host is active first, or `mc`/`aws s3` will hang +(global memory `happ-vpn-bypass-for-servers`). + +```sh +VPS_S3_ENDPOINT=https:// \ +VPS_S3_BUCKET=solid-replays \ +VPS_S3_ACCESS_KEY_ID= \ +VPS_S3_SECRET_ACCESS_KEY= \ +bash src/test/golden/scripts/capture-artifacts.sh +``` + +The committed `artifacts.tar.gz` is the **floor** until the human runs the capture; the live +full-corpus run is a **master-only CI** step. Never commit VPS host/key/cred values — the +script reads them from env only. The captured artifact JSONs themselves are committable +(SteamID masking is enforced server-side at the mapper, so `GET /stats/*` snapshots cannot +contain Steam64). + +## Pinned tech-debt + +The oracle pins **current** behavior as-is. When a snapshot captures a known defect, leave a +one-line comment at the assertion site plus a backlog pointer — do **not** "fix" the behavior +inside the oracle (that would mask the drift the oracle exists to catch). + +> Example: `// PINNED TECH-DEBT (BACKLOG-xxx): is captured as-is; do not fix here.` + +## CI wiring + +A distinct master-only `golden-oracle` job is wired in `.github/workflows/cd.yml`. It is gated +to `push` on `master`/`main` (`if: github.event_name == 'push' && github.ref == 'refs/heads/master'`), +brings up the docker-compose PG/RabbitMQ/MinIO services, and runs `pnpm run test:golden` with a +30-minute job timeout. It is deliberately NOT a `pull_request` check and NOT part of the `verify` +job, so it never blocks a Docker-less PR and `verify` stays green at 100% without it. + +`test:golden` is never added to the `verify` chain or to `test:coverage` (zero coverage +obligation; `src/test/**` is coverage-excluded). diff --git a/src/test/golden/__snapshots__/pipeline-aggregate-combat.snap.json b/src/test/golden/__snapshots__/pipeline-aggregate-combat.snap.json new file mode 100644 index 0000000..bbb2a6a --- /dev/null +++ b/src/test/golden/__snapshots__/pipeline-aggregate-combat.snap.json @@ -0,0 +1,424 @@ +{ + "replays": [ + { + "id": "uuid:1", + "source_system": "golden", + "source_replay_id": "golden-aggregate-combat", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "rotation_id": null, + "status": "parsed", + "promotion_evidence": {}, + "promoted_from_staging_id": "uuid:2", + "created_at": "", + "updated_at": "", + "slug": null, + "game_type": null + } + ], + "parse_jobs": [ + { + "id": "uuid:3", + "replay_id": "uuid:1", + "parser_contract_version": "3.0.0", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "status": "succeeded", + "attempts": 1, + "published_at": "", + "started_at": null, + "finished_at": "", + "error": null, + "created_at": "", + "updated_at": "" + } + ], + "parser_results": [ + { + "id": "uuid:4", + "replay_id": "uuid:1", + "parse_job_id": "uuid:3", + "parser_contract_version": "3.0.0", + "status": "current", + "raw_snapshot": { + "parser": { + "name": "replay-parser-2", + "version": "0.1.0" + }, + "replay": { + "end_frame": { + "state": "present", + "value": 120 + }, + "world_name": { + "state": "present", + "value": "Altis" + }, + "time_bounds": { + "state": "present", + "value": { + "end_seconds": 60, + "start_seconds": 0 + } + }, + "frame_bounds": { + "state": "present", + "value": { + "end_frame": 120, + "start_frame": 0 + } + }, + "mission_name": { + "state": "present", + "value": "sg aggregate projection" + }, + "capture_delay": { + "state": "present", + "value": 0.5 + }, + "players_count": { + "state": "present", + "value": [ + 0, + 6 + ] + }, + "mission_author": { + "state": "present", + "value": "SolidGames" + } + }, + "source": { + "checksum": { + "state": "present", + "value": { + "value": "1c7d9186b3d66267a36325e47a56c6c05ea1e09372df483ea04366bb5052ba2b", + "algorithm": "sha256" + } + }, + "source_file": "" + }, + "status": "success", + "players": [ + { + "g": "Alpha 1-1", + "k": 1, + "n": "Alpha", + "r": "Rifleman", + "s": "west", + "rn": "[A] Alpha", + "vk": 1, + "eid": 1, + "kfv": 1, + "tag": "[A]", + "kills": [ + { + "c": "enemy_kill", + "v": 2, + "w": 2, + "av": 30, + "avc": "car" + } + ] + }, + { + "d": 1, + "g": "Bravo 1-1", + "n": "Bravo", + "r": "Rifleman", + "s": "east", + "eid": 2 + }, + { + "d": 1, + "g": "Bravo 1-2", + "n": "Delta", + "r": "Grenadier", + "s": "east", + "eid": 4, + "nkd": 1 + }, + { + "d": 1, + "g": "Alpha 1-2", + "n": "Echo", + "r": "Medic", + "s": "west", + "ck": "legacy_name:Echo", + "td": 1, + "tk": 1, + "eid": 6, + "eids": [ + 5, + 6 + ], + "kills": [ + { + "c": "teamkill", + "v": 6, + "w": 1 + } + ] + } + ], + "weapons": [ + { + "n": "AK-74", + "id": 1 + }, + { + "n": "Offroad HMG", + "id": 2 + }, + { + "n": "RPG-7", + "id": 3 + } + ], + "contract_version": "3.0.0", + "destroyed_vehicles": [ + { + "a": 1, + "c": "enemy", + "w": 3, + "dc": "apc", + "de": 20, + "dt": "vehicle" + } + ] + }, + "created_at": "" + } + ], + "parser_events": [ + { + "id": "uuid:11", + "parser_result_id": "uuid:4", + "event_type": "destroyed_vehicle", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "weapon_id": 3, + "weapon_name": "RPG-7", + "classification": "enemy", + "destroyed_type": "vehicle", + "destroyed_class": "apc", + "attacker_entity_id": 1, + "destroyed_entity_id": 20 + }, + "source_ref": { + "destroyed_vehicle_index": 0 + }, + "created_at": "" + }, + { + "id": "uuid:9", + "parser_result_id": "uuid:4", + "event_type": "kill", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "attacker": { + "name": "Alpha", + "role": "Rifleman", + "side": "west", + "group": "Alpha 1-1", + "entity_id": 1 + }, + "weapon_id": 2, + "weapon_name": "Offroad HMG", + "classification": "enemy_kill", + "victim_entity_id": 2 + }, + "source_ref": { + "player_entity_id": 1, + "player_kill_index": 0 + }, + "created_at": "" + }, + { + "id": "uuid:5", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "kills": 1, + "player": { + "name": "Alpha", + "role": "Rifleman", + "side": "west", + "group": "Alpha 1-1", + "entity_id": 1 + }, + "vehicle_kills": 1, + "kills_from_vehicle": 1 + }, + "source_ref": { + "player_entity_id": 1 + }, + "created_at": "" + }, + { + "id": "uuid:6", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "2", + "payload": { + "player": { + "name": "Bravo", + "role": "Rifleman", + "side": "east", + "group": "Bravo 1-1", + "entity_id": 2 + }, + "deaths_total": 1 + }, + "source_ref": { + "player_entity_id": 2 + }, + "created_at": "" + }, + { + "id": "uuid:7", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "4", + "payload": { + "player": { + "name": "Delta", + "role": "Grenadier", + "side": "east", + "group": "Bravo 1-2", + "entity_id": 4 + }, + "deaths_total": 1, + "null_killer_deaths": 1 + }, + "source_ref": { + "player_entity_id": 4 + }, + "created_at": "" + }, + { + "id": "uuid:8", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "6", + "payload": { + "player": { + "name": "Echo", + "role": "Medic", + "side": "west", + "group": "Alpha 1-2", + "entity_id": 6 + }, + "teamkills": 1, + "deaths_total": 1, + "deaths_by_teamkills": 1 + }, + "source_ref": { + "player_entity_id": 6 + }, + "created_at": "" + }, + { + "id": "uuid:10", + "parser_result_id": "uuid:4", + "event_type": "teamkill", + "occurred_at": null, + "observed_player_ref": "6", + "payload": { + "attacker": { + "name": "Echo", + "role": "Medic", + "side": "west", + "group": "Alpha 1-2", + "entity_id": 6 + }, + "weapon_id": 1, + "weapon_name": "AK-74", + "classification": "teamkill", + "victim_entity_id": 6 + }, + "source_ref": { + "player_entity_id": 6, + "player_kill_index": 0 + }, + "created_at": "" + } + ], + "player_stats": [], + "squad_stats": [], + "commander_side_stats": [], + "bounty_points": [], + "ingest_staging_records": [ + { + "id": "uuid:2", + "source_system": "golden", + "source_replay_id": "golden-aggregate-combat", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "status": "promoted", + "promotion_evidence": { + "replay_id": "uuid:1", + "parse_job_id": "uuid:3" + }, + "conflict_details": {}, + "created_at": "", + "updated_at": "" + } + ], + "GET /stats/overview": { + "filters": { + "rotationId": null + }, + "totals": { + "bountyPlayers": 0, + "commanderSides": 0, + "parsedReplays": 1, + "players": 0, + "playerStatRows": 0, + "replays": 1, + "squads": 0, + "squadStatRows": 0 + } + }, + "GET /stats/players": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/squads": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/bounty": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/replays": { + "hasMore": false, + "items": [ + { + "id": "uuid:1", + "slug": null, + "rotationId": null, + "replayTimestamp": "2026-05-09T00:00:00.000Z", + "sourceSystem": "golden", + "sourceReplayId": "golden-aggregate-combat", + "status": "parsed" + } + ], + "nextCursor": null + } +} \ No newline at end of file diff --git a/src/test/golden/__snapshots__/pipeline-combat-events.snap.json b/src/test/golden/__snapshots__/pipeline-combat-events.snap.json new file mode 100644 index 0000000..9e1cc0e --- /dev/null +++ b/src/test/golden/__snapshots__/pipeline-combat-events.snap.json @@ -0,0 +1,465 @@ +{ + "replays": [ + { + "id": "uuid:1", + "source_system": "golden", + "source_replay_id": "golden-combat-events", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "rotation_id": null, + "status": "parsed", + "promotion_evidence": {}, + "promoted_from_staging_id": "uuid:2", + "created_at": "", + "updated_at": "", + "slug": null, + "game_type": null + } + ], + "parse_jobs": [ + { + "id": "uuid:3", + "replay_id": "uuid:1", + "parser_contract_version": "3.0.0", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "status": "succeeded", + "attempts": 1, + "published_at": "", + "started_at": null, + "finished_at": "", + "error": null, + "created_at": "", + "updated_at": "" + } + ], + "parser_results": [ + { + "id": "uuid:4", + "replay_id": "uuid:1", + "parse_job_id": "uuid:3", + "parser_contract_version": "3.0.0", + "status": "current", + "raw_snapshot": { + "parser": { + "name": "replay-parser-2", + "version": "0.1.0" + }, + "replay": { + "end_frame": { + "state": "present", + "value": 120 + }, + "world_name": { + "state": "present", + "value": "Altis" + }, + "time_bounds": { + "state": "present", + "value": { + "end_seconds": 60, + "start_seconds": 0 + } + }, + "frame_bounds": { + "state": "present", + "value": { + "end_frame": 120, + "start_frame": 0 + } + }, + "mission_name": { + "state": "present", + "value": "sg combat event semantics" + }, + "capture_delay": { + "state": "present", + "value": 0.5 + }, + "players_count": { + "state": "present", + "value": [ + 0, + 5 + ] + }, + "mission_author": { + "state": "present", + "value": "SolidGames" + } + }, + "source": { + "checksum": { + "state": "present", + "value": { + "value": "b65671725e5c77fba0255d9eb9307f410a2d85aafdcb7aae46e355efc1e3a97d", + "algorithm": "sha256" + } + }, + "source_file": "" + }, + "status": "partial", + "players": [ + { + "g": "Alpha 1-1", + "k": 1, + "n": "Alpha", + "r": "Rifleman", + "s": "west", + "tk": 1, + "vk": 1, + "eid": 1, + "kills": [ + { + "c": "enemy_kill", + "v": 2, + "w": 1 + }, + { + "c": "teamkill", + "v": 3, + "w": 1 + } + ] + }, + { + "d": 1, + "g": "Bravo 1-1", + "n": "Bravo", + "r": "Rifleman", + "s": "east", + "ud": 1, + "eid": 2 + }, + { + "d": 1, + "g": "Alpha 1-1", + "n": "Charlie", + "r": "Medic", + "s": "west", + "td": 1, + "eid": 3 + }, + { + "d": 1, + "g": "Bravo 1-1", + "n": "Delta", + "r": "Grenadier", + "s": "east", + "su": 1, + "eid": 4 + }, + { + "d": 1, + "g": "Bravo 1-2", + "n": "Echo", + "r": "Autorifleman", + "s": "east", + "eid": 5, + "nkd": 1 + } + ], + "weapons": [ + { + "n": "AK-74", + "id": 1 + }, + { + "n": "RPG-7", + "id": 2 + } + ], + "diagnostics": [ + { + "code": "event.killed_actor_unknown", + "message": "Killed event references an actor that is missing from normalized entities", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + } + ], + "contract_version": "3.0.0", + "destroyed_vehicles": [ + { + "a": 1, + "c": "enemy", + "w": 2, + "dc": "apc", + "de": 20, + "dt": "vehicle" + } + ] + }, + "created_at": "" + } + ], + "parser_events": [ + { + "id": "uuid:12", + "parser_result_id": "uuid:4", + "event_type": "destroyed_vehicle", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "weapon_id": 2, + "weapon_name": "RPG-7", + "classification": "enemy", + "destroyed_type": "vehicle", + "destroyed_class": "apc", + "attacker_entity_id": 1, + "destroyed_entity_id": 20 + }, + "source_ref": { + "destroyed_vehicle_index": 0 + }, + "created_at": "" + }, + { + "id": "uuid:13", + "parser_result_id": "uuid:4", + "event_type": "diagnostic", + "occurred_at": null, + "observed_player_ref": null, + "payload": { + "code": "event.killed_actor_unknown", + "message": "Killed event references an actor that is missing from normalized entities", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + }, + "source_ref": { + "diagnostic_index": 0 + }, + "created_at": "" + }, + { + "id": "uuid:10", + "parser_result_id": "uuid:4", + "event_type": "kill", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "attacker": { + "name": "Alpha", + "role": "Rifleman", + "side": "west", + "group": "Alpha 1-1", + "entity_id": 1 + }, + "weapon_id": 1, + "weapon_name": "AK-74", + "classification": "enemy_kill", + "victim_entity_id": 2 + }, + "source_ref": { + "player_entity_id": 1, + "player_kill_index": 0 + }, + "created_at": "" + }, + { + "id": "uuid:5", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "kills": 1, + "player": { + "name": "Alpha", + "role": "Rifleman", + "side": "west", + "group": "Alpha 1-1", + "entity_id": 1 + }, + "teamkills": 1, + "vehicle_kills": 1 + }, + "source_ref": { + "player_entity_id": 1 + }, + "created_at": "" + }, + { + "id": "uuid:6", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "2", + "payload": { + "player": { + "name": "Bravo", + "role": "Rifleman", + "side": "east", + "group": "Bravo 1-1", + "entity_id": 2 + }, + "deaths_total": 1, + "unknown_deaths": 1 + }, + "source_ref": { + "player_entity_id": 2 + }, + "created_at": "" + }, + { + "id": "uuid:7", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "3", + "payload": { + "player": { + "name": "Charlie", + "role": "Medic", + "side": "west", + "group": "Alpha 1-1", + "entity_id": 3 + }, + "deaths_total": 1, + "deaths_by_teamkills": 1 + }, + "source_ref": { + "player_entity_id": 3 + }, + "created_at": "" + }, + { + "id": "uuid:8", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "4", + "payload": { + "player": { + "name": "Delta", + "role": "Grenadier", + "side": "east", + "group": "Bravo 1-1", + "entity_id": 4 + }, + "suicides": 1, + "deaths_total": 1 + }, + "source_ref": { + "player_entity_id": 4 + }, + "created_at": "" + }, + { + "id": "uuid:9", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "5", + "payload": { + "player": { + "name": "Echo", + "role": "Autorifleman", + "side": "east", + "group": "Bravo 1-2", + "entity_id": 5 + }, + "deaths_total": 1, + "null_killer_deaths": 1 + }, + "source_ref": { + "player_entity_id": 5 + }, + "created_at": "" + }, + { + "id": "uuid:11", + "parser_result_id": "uuid:4", + "event_type": "teamkill", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "attacker": { + "name": "Alpha", + "role": "Rifleman", + "side": "west", + "group": "Alpha 1-1", + "entity_id": 1 + }, + "weapon_id": 1, + "weapon_name": "AK-74", + "classification": "teamkill", + "victim_entity_id": 3 + }, + "source_ref": { + "player_entity_id": 1, + "player_kill_index": 1 + }, + "created_at": "" + } + ], + "player_stats": [], + "squad_stats": [], + "commander_side_stats": [], + "bounty_points": [], + "ingest_staging_records": [ + { + "id": "uuid:2", + "source_system": "golden", + "source_replay_id": "golden-combat-events", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "status": "promoted", + "promotion_evidence": { + "replay_id": "uuid:1", + "parse_job_id": "uuid:3" + }, + "conflict_details": {}, + "created_at": "", + "updated_at": "" + } + ], + "GET /stats/overview": { + "filters": { + "rotationId": null + }, + "totals": { + "bountyPlayers": 0, + "commanderSides": 0, + "parsedReplays": 1, + "players": 0, + "playerStatRows": 0, + "replays": 1, + "squads": 0, + "squadStatRows": 0 + } + }, + "GET /stats/players": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/squads": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/bounty": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/replays": { + "hasMore": false, + "items": [ + { + "id": "uuid:1", + "slug": null, + "rotationId": null, + "replayTimestamp": "2026-05-09T00:00:00.000Z", + "sourceSystem": "golden", + "sourceReplayId": "golden-combat-events", + "status": "parsed" + } + ], + "nextCursor": null + } +} \ No newline at end of file diff --git a/src/test/golden/__snapshots__/pipeline-connected-backfill.snap.json b/src/test/golden/__snapshots__/pipeline-connected-backfill.snap.json new file mode 100644 index 0000000..525439c --- /dev/null +++ b/src/test/golden/__snapshots__/pipeline-connected-backfill.snap.json @@ -0,0 +1,187 @@ +{ + "replays": [ + { + "id": "uuid:1", + "source_system": "golden", + "source_replay_id": "golden-connected-backfill", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "rotation_id": null, + "status": "parsed", + "promotion_evidence": {}, + "promoted_from_staging_id": "uuid:2", + "created_at": "", + "updated_at": "", + "slug": null, + "game_type": null + } + ], + "parse_jobs": [ + { + "id": "uuid:3", + "replay_id": "uuid:1", + "parser_contract_version": "3.0.0", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "status": "succeeded", + "attempts": 1, + "published_at": "", + "started_at": null, + "finished_at": "", + "error": null, + "created_at": "", + "updated_at": "" + } + ], + "parser_results": [ + { + "id": "uuid:4", + "replay_id": "uuid:1", + "parse_job_id": "uuid:3", + "parser_contract_version": "3.0.0", + "status": "current", + "raw_snapshot": { + "parser": { + "name": "replay-parser-2", + "version": "0.1.0" + }, + "replay": { + "end_frame": { + "state": "present", + "value": 90 + }, + "world_name": { + "state": "present", + "value": "Altis" + }, + "time_bounds": { + "state": "present", + "value": { + "end_seconds": 45, + "start_seconds": 0 + } + }, + "frame_bounds": { + "state": "present", + "value": { + "end_frame": 90, + "start_frame": 0 + } + }, + "mission_name": { + "state": "present", + "value": "sg connected backfill" + }, + "capture_delay": { + "state": "present", + "value": 0.5 + }, + "players_count": { + "state": "present", + "value": [ + 0, + 1 + ] + }, + "mission_author": { + "state": "present", + "value": "SolidGames" + } + }, + "source": { + "checksum": { + "state": "present", + "value": { + "value": "732963c73f40d339f6227079749129ad5ac9bed6298e95401034b74725431d32", + "algorithm": "sha256" + } + }, + "source_file": "" + }, + "status": "success", + "players": [ + { + "g": "Alpha 1-1", + "n": "BackfilledName", + "r": "Rifleman", + "s": "west", + "eid": 11 + } + ], + "contract_version": "3.0.0" + }, + "created_at": "" + } + ], + "parser_events": [], + "player_stats": [], + "squad_stats": [], + "commander_side_stats": [], + "bounty_points": [], + "ingest_staging_records": [ + { + "id": "uuid:2", + "source_system": "golden", + "source_replay_id": "golden-connected-backfill", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "status": "promoted", + "promotion_evidence": { + "replay_id": "uuid:1", + "parse_job_id": "uuid:3" + }, + "conflict_details": {}, + "created_at": "", + "updated_at": "" + } + ], + "GET /stats/overview": { + "filters": { + "rotationId": null + }, + "totals": { + "bountyPlayers": 0, + "commanderSides": 0, + "parsedReplays": 1, + "players": 0, + "playerStatRows": 0, + "replays": 1, + "squads": 0, + "squadStatRows": 0 + } + }, + "GET /stats/players": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/squads": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/bounty": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/replays": { + "hasMore": false, + "items": [ + { + "id": "uuid:1", + "slug": null, + "rotationId": null, + "replayTimestamp": "2026-05-09T00:00:00.000Z", + "sourceSystem": "golden", + "sourceReplayId": "golden-connected-backfill", + "status": "parsed" + } + ], + "nextCursor": null + } +} \ No newline at end of file diff --git a/src/test/golden/__snapshots__/pipeline-duplicate-slot-same-name.snap.json b/src/test/golden/__snapshots__/pipeline-duplicate-slot-same-name.snap.json new file mode 100644 index 0000000..b48b1b9 --- /dev/null +++ b/src/test/golden/__snapshots__/pipeline-duplicate-slot-same-name.snap.json @@ -0,0 +1,199 @@ +{ + "replays": [ + { + "id": "uuid:1", + "source_system": "golden", + "source_replay_id": "golden-duplicate-slot-same-name", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "rotation_id": null, + "status": "parsed", + "promotion_evidence": {}, + "promoted_from_staging_id": "uuid:2", + "created_at": "", + "updated_at": "", + "slug": null, + "game_type": null + } + ], + "parse_jobs": [ + { + "id": "uuid:3", + "replay_id": "uuid:1", + "parser_contract_version": "3.0.0", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "status": "succeeded", + "attempts": 1, + "published_at": "", + "started_at": null, + "finished_at": "", + "error": null, + "created_at": "", + "updated_at": "" + } + ], + "parser_results": [ + { + "id": "uuid:4", + "replay_id": "uuid:1", + "parse_job_id": "uuid:3", + "parser_contract_version": "3.0.0", + "status": "current", + "raw_snapshot": { + "parser": { + "name": "replay-parser-2", + "version": "0.1.0" + }, + "replay": { + "end_frame": { + "state": "present", + "value": 120 + }, + "world_name": { + "state": "present", + "value": "Altis" + }, + "time_bounds": { + "state": "present", + "value": { + "end_seconds": 60, + "start_seconds": 0 + } + }, + "frame_bounds": { + "state": "present", + "value": { + "end_frame": 120, + "start_frame": 0 + } + }, + "mission_name": { + "state": "present", + "value": "sg duplicate slot same name" + }, + "capture_delay": { + "state": "present", + "value": 0.5 + }, + "players_count": { + "state": "present", + "value": [ + 0, + 3 + ] + }, + "mission_author": { + "state": "present", + "value": "SolidGames" + } + }, + "source": { + "checksum": { + "state": "present", + "value": { + "value": "0eb1bbc0a35237e7fb7c508d56b5c9d6d247cd72fc834026af5ec5b743956b4e", + "algorithm": "sha256" + } + }, + "source_file": "" + }, + "status": "success", + "players": [ + { + "g": "Alpha 1-2", + "n": "SameName", + "r": "Autorifleman", + "s": "west", + "ck": "legacy_name:SameName", + "eid": 22, + "eids": [ + 21, + 22 + ] + }, + { + "g": "Alpha 1-3", + "n": "OtherName", + "r": "Medic", + "s": "west", + "eid": 23 + } + ], + "contract_version": "3.0.0" + }, + "created_at": "" + } + ], + "parser_events": [], + "player_stats": [], + "squad_stats": [], + "commander_side_stats": [], + "bounty_points": [], + "ingest_staging_records": [ + { + "id": "uuid:2", + "source_system": "golden", + "source_replay_id": "golden-duplicate-slot-same-name", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "status": "promoted", + "promotion_evidence": { + "replay_id": "uuid:1", + "parse_job_id": "uuid:3" + }, + "conflict_details": {}, + "created_at": "", + "updated_at": "" + } + ], + "GET /stats/overview": { + "filters": { + "rotationId": null + }, + "totals": { + "bountyPlayers": 0, + "commanderSides": 0, + "parsedReplays": 1, + "players": 0, + "playerStatRows": 0, + "replays": 1, + "squads": 0, + "squadStatRows": 0 + } + }, + "GET /stats/players": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/squads": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/bounty": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/replays": { + "hasMore": false, + "items": [ + { + "id": "uuid:1", + "slug": null, + "rotationId": null, + "replayTimestamp": "2026-05-09T00:00:00.000Z", + "sourceSystem": "golden", + "sourceReplayId": "golden-duplicate-slot-same-name", + "status": "parsed" + } + ], + "nextCursor": null + } +} \ No newline at end of file diff --git a/src/test/golden/__snapshots__/pipeline-killed-events.snap.json b/src/test/golden/__snapshots__/pipeline-killed-events.snap.json new file mode 100644 index 0000000..ac7f468 --- /dev/null +++ b/src/test/golden/__snapshots__/pipeline-killed-events.snap.json @@ -0,0 +1,296 @@ +{ + "replays": [ + { + "id": "uuid:1", + "source_system": "golden", + "source_replay_id": "golden-killed-events", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "rotation_id": null, + "status": "parsed", + "promotion_evidence": {}, + "promoted_from_staging_id": "uuid:2", + "created_at": "", + "updated_at": "", + "slug": null, + "game_type": null + } + ], + "parse_jobs": [ + { + "id": "uuid:3", + "replay_id": "uuid:1", + "parser_contract_version": "3.0.0", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "status": "succeeded", + "attempts": 1, + "published_at": "", + "started_at": null, + "finished_at": "", + "error": null, + "created_at": "", + "updated_at": "" + } + ], + "parser_results": [ + { + "id": "uuid:4", + "replay_id": "uuid:1", + "parse_job_id": "uuid:3", + "parser_contract_version": "3.0.0", + "status": "current", + "raw_snapshot": { + "parser": { + "name": "replay-parser-2", + "version": "0.1.0" + }, + "replay": { + "end_frame": { + "state": "present", + "value": 60 + }, + "world_name": { + "state": "present", + "value": "Altis" + }, + "time_bounds": { + "state": "present", + "value": { + "end_seconds": 30, + "start_seconds": 0 + } + }, + "frame_bounds": { + "state": "present", + "value": { + "end_frame": 60, + "start_frame": 0 + } + }, + "mission_name": { + "state": "present", + "value": "sg killed events" + }, + "capture_delay": { + "state": "present", + "value": 0.5 + }, + "players_count": { + "state": "present", + "value": [ + 0, + 4 + ] + }, + "mission_author": { + "state": "present", + "value": "SolidGames" + } + }, + "source": { + "checksum": { + "state": "present", + "value": { + "value": "8f992efa312f1f7e250c358f3ac8916a7c2be661ade2b97378448cc2452a6484", + "algorithm": "sha256" + } + }, + "source_file": "" + }, + "status": "partial", + "diagnostics": [ + { + "code": "event.killed_actor_unknown", + "message": "Killed event references an actor that is missing from normalized entities", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + }, + { + "code": "event.killed_actor_unknown", + "message": "Killed event has an explicit null killer but no known player victim", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + }, + { + "code": "event.killed_actor_unknown", + "message": "Killed event references an actor that is missing from normalized entities", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + }, + { + "code": "event.killed_shape_unknown", + "message": "Killed event kill-info tuple had an unexpected source shape", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + }, + { + "code": "event.killed_shape_unknown", + "message": "Killed event frame had an unexpected source shape", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + } + ], + "contract_version": "3.0.0" + }, + "created_at": "" + } + ], + "parser_events": [ + { + "id": "uuid:5", + "parser_result_id": "uuid:4", + "event_type": "diagnostic", + "occurred_at": null, + "observed_player_ref": null, + "payload": { + "code": "event.killed_actor_unknown", + "message": "Killed event references an actor that is missing from normalized entities", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + }, + "source_ref": { + "diagnostic_index": 0 + }, + "created_at": "" + }, + { + "id": "uuid:6", + "parser_result_id": "uuid:4", + "event_type": "diagnostic", + "occurred_at": null, + "observed_player_ref": null, + "payload": { + "code": "event.killed_actor_unknown", + "message": "Killed event has an explicit null killer but no known player victim", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + }, + "source_ref": { + "diagnostic_index": 1 + }, + "created_at": "" + }, + { + "id": "uuid:7", + "parser_result_id": "uuid:4", + "event_type": "diagnostic", + "occurred_at": null, + "observed_player_ref": null, + "payload": { + "code": "event.killed_actor_unknown", + "message": "Killed event references an actor that is missing from normalized entities", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + }, + "source_ref": { + "diagnostic_index": 2 + }, + "created_at": "" + }, + { + "id": "uuid:8", + "parser_result_id": "uuid:4", + "event_type": "diagnostic", + "occurred_at": null, + "observed_player_ref": null, + "payload": { + "code": "event.killed_shape_unknown", + "message": "Killed event kill-info tuple had an unexpected source shape", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + }, + "source_ref": { + "diagnostic_index": 3 + }, + "created_at": "" + }, + { + "id": "uuid:9", + "parser_result_id": "uuid:4", + "event_type": "diagnostic", + "occurred_at": null, + "observed_player_ref": null, + "payload": { + "code": "event.killed_shape_unknown", + "message": "Killed event frame had an unexpected source shape", + "severity": "warning", + "parser_action": "emit_unknown_combat_event" + }, + "source_ref": { + "diagnostic_index": 4 + }, + "created_at": "" + } + ], + "player_stats": [], + "squad_stats": [], + "commander_side_stats": [], + "bounty_points": [], + "ingest_staging_records": [ + { + "id": "uuid:2", + "source_system": "golden", + "source_replay_id": "golden-killed-events", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "status": "promoted", + "promotion_evidence": { + "replay_id": "uuid:1", + "parse_job_id": "uuid:3" + }, + "conflict_details": {}, + "created_at": "", + "updated_at": "" + } + ], + "GET /stats/overview": { + "filters": { + "rotationId": null + }, + "totals": { + "bountyPlayers": 0, + "commanderSides": 0, + "parsedReplays": 1, + "players": 0, + "playerStatRows": 0, + "replays": 1, + "squads": 0, + "squadStatRows": 0 + } + }, + "GET /stats/players": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/squads": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/bounty": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/replays": { + "hasMore": false, + "items": [ + { + "id": "uuid:1", + "slug": null, + "rotationId": null, + "replayTimestamp": "2026-05-09T00:00:00.000Z", + "sourceSystem": "golden", + "sourceReplayId": "golden-killed-events", + "status": "parsed" + } + ], + "nextCursor": null + } +} \ No newline at end of file diff --git a/src/test/golden/__snapshots__/pipeline-metadata-drift.snap.json b/src/test/golden/__snapshots__/pipeline-metadata-drift.snap.json new file mode 100644 index 0000000..81e5891 --- /dev/null +++ b/src/test/golden/__snapshots__/pipeline-metadata-drift.snap.json @@ -0,0 +1,201 @@ +{ + "replays": [ + { + "id": "uuid:1", + "source_system": "golden", + "source_replay_id": "golden-metadata-drift", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "rotation_id": null, + "status": "parsed", + "promotion_evidence": {}, + "promoted_from_staging_id": "uuid:2", + "created_at": "", + "updated_at": "", + "slug": null, + "game_type": null + } + ], + "parse_jobs": [ + { + "id": "uuid:3", + "replay_id": "uuid:1", + "parser_contract_version": "3.0.0", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "status": "succeeded", + "attempts": 1, + "published_at": "", + "started_at": null, + "finished_at": "", + "error": null, + "created_at": "", + "updated_at": "" + } + ], + "parser_results": [ + { + "id": "uuid:4", + "replay_id": "uuid:1", + "parse_job_id": "uuid:3", + "parser_contract_version": "3.0.0", + "status": "current", + "raw_snapshot": { + "parser": { + "name": "replay-parser-2", + "version": "0.1.0" + }, + "replay": { + "end_frame": { + "state": "present", + "value": 120 + }, + "world_name": { + "state": "present", + "value": "Altis" + }, + "time_bounds": { + "state": "present", + "value": { + "end_seconds": 60, + "start_seconds": 0 + } + }, + "frame_bounds": { + "state": "present", + "value": { + "end_frame": 120, + "start_frame": 0 + } + }, + "mission_name": { + "state": "present", + "value": "sg solid operation" + }, + "capture_delay": { + "state": "present", + "value": 0.5 + }, + "players_count": { + "state": "unknown", + "reason": "schema_drift" + }, + "mission_author": { + "state": "unknown", + "reason": "source_field_absent" + } + }, + "source": { + "checksum": { + "state": "present", + "value": { + "value": "155eb487b408e1bf370934ce615fc99e112c95f391dc013a49b0f726291c05ae", + "algorithm": "sha256" + } + }, + "source_file": "" + }, + "status": "partial", + "diagnostics": [ + { + "code": "schema.metadata_field", + "message": "Metadata field playersCount had unexpected source shape", + "severity": "warning", + "parser_action": "set_unknown" + } + ], + "contract_version": "3.0.0" + }, + "created_at": "" + } + ], + "parser_events": [ + { + "id": "uuid:5", + "parser_result_id": "uuid:4", + "event_type": "diagnostic", + "occurred_at": null, + "observed_player_ref": null, + "payload": { + "code": "schema.metadata_field", + "message": "Metadata field playersCount had unexpected source shape", + "severity": "warning", + "parser_action": "set_unknown" + }, + "source_ref": { + "diagnostic_index": 0 + }, + "created_at": "" + } + ], + "player_stats": [], + "squad_stats": [], + "commander_side_stats": [], + "bounty_points": [], + "ingest_staging_records": [ + { + "id": "uuid:2", + "source_system": "golden", + "source_replay_id": "golden-metadata-drift", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "status": "promoted", + "promotion_evidence": { + "replay_id": "uuid:1", + "parse_job_id": "uuid:3" + }, + "conflict_details": {}, + "created_at": "", + "updated_at": "" + } + ], + "GET /stats/overview": { + "filters": { + "rotationId": null + }, + "totals": { + "bountyPlayers": 0, + "commanderSides": 0, + "parsedReplays": 1, + "players": 0, + "playerStatRows": 0, + "replays": 1, + "squads": 0, + "squadStatRows": 0 + } + }, + "GET /stats/players": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/squads": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/bounty": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/replays": { + "hasMore": false, + "items": [ + { + "id": "uuid:1", + "slug": null, + "rotationId": null, + "replayTimestamp": "2026-05-09T00:00:00.000Z", + "sourceSystem": "golden", + "sourceReplayId": "golden-metadata-drift", + "status": "parsed" + } + ], + "nextCursor": null + } +} \ No newline at end of file diff --git a/src/test/golden/__snapshots__/pipeline-side-facts.snap.json b/src/test/golden/__snapshots__/pipeline-side-facts.snap.json new file mode 100644 index 0000000..5dbecbf --- /dev/null +++ b/src/test/golden/__snapshots__/pipeline-side-facts.snap.json @@ -0,0 +1,194 @@ +{ + "replays": [ + { + "id": "uuid:1", + "source_system": "golden", + "source_replay_id": "golden-side-facts", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "rotation_id": null, + "status": "parsed", + "promotion_evidence": {}, + "promoted_from_staging_id": "uuid:2", + "created_at": "", + "updated_at": "", + "slug": null, + "game_type": null + } + ], + "parse_jobs": [ + { + "id": "uuid:3", + "replay_id": "uuid:1", + "parser_contract_version": "3.0.0", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "status": "succeeded", + "attempts": 1, + "published_at": "", + "started_at": null, + "finished_at": "", + "error": null, + "created_at": "", + "updated_at": "" + } + ], + "parser_results": [ + { + "id": "uuid:4", + "replay_id": "uuid:1", + "parse_job_id": "uuid:3", + "parser_contract_version": "3.0.0", + "status": "current", + "raw_snapshot": { + "parser": { + "name": "replay-parser-2", + "version": "0.1.0" + }, + "replay": { + "end_frame": { + "state": "present", + "value": 120 + }, + "world_name": { + "state": "present", + "value": "Altis" + }, + "time_bounds": { + "state": "present", + "value": { + "end_seconds": 60, + "start_seconds": 0 + } + }, + "frame_bounds": { + "state": "present", + "value": { + "end_frame": 120, + "start_frame": 0 + } + }, + "mission_name": { + "state": "present", + "value": "sg side facts" + }, + "capture_delay": { + "state": "present", + "value": 0.5 + }, + "players_count": { + "state": "present", + "value": [ + 0, + 2 + ] + }, + "mission_author": { + "state": "present", + "value": "SolidGames" + } + }, + "source": { + "checksum": { + "state": "present", + "value": { + "value": "a5f60b6eee977775d2cf8a48c109ee8bc31bb61169742059ab8f929e75451ca2", + "algorithm": "sha256" + } + }, + "source_file": "" + }, + "status": "success", + "players": [ + { + "g": "HQ", + "n": "Alpha KS", + "r": "Commander", + "s": "west", + "eid": 1 + }, + { + "g": "Bravo 1-1", + "n": "Bravo", + "r": "Rifleman", + "s": "east", + "eid": 2 + } + ], + "contract_version": "3.0.0" + }, + "created_at": "" + } + ], + "parser_events": [], + "player_stats": [], + "squad_stats": [], + "commander_side_stats": [], + "bounty_points": [], + "ingest_staging_records": [ + { + "id": "uuid:2", + "source_system": "golden", + "source_replay_id": "golden-side-facts", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "status": "promoted", + "promotion_evidence": { + "replay_id": "uuid:1", + "parse_job_id": "uuid:3" + }, + "conflict_details": {}, + "created_at": "", + "updated_at": "" + } + ], + "GET /stats/overview": { + "filters": { + "rotationId": null + }, + "totals": { + "bountyPlayers": 0, + "commanderSides": 0, + "parsedReplays": 1, + "players": 0, + "playerStatRows": 0, + "replays": 1, + "squads": 0, + "squadStatRows": 0 + } + }, + "GET /stats/players": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/squads": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/bounty": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/replays": { + "hasMore": false, + "items": [ + { + "id": "uuid:1", + "slug": null, + "rotationId": null, + "replayTimestamp": "2026-05-09T00:00:00.000Z", + "sourceSystem": "golden", + "sourceReplayId": "golden-side-facts", + "status": "parsed" + } + ], + "nextCursor": null + } +} \ No newline at end of file diff --git a/src/test/golden/__snapshots__/pipeline-valid-minimal.snap.json b/src/test/golden/__snapshots__/pipeline-valid-minimal.snap.json new file mode 100644 index 0000000..c66fc5f --- /dev/null +++ b/src/test/golden/__snapshots__/pipeline-valid-minimal.snap.json @@ -0,0 +1,179 @@ +{ + "replays": [ + { + "id": "uuid:1", + "source_system": "golden", + "source_replay_id": "golden-valid-minimal", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "rotation_id": null, + "status": "parsed", + "promotion_evidence": {}, + "promoted_from_staging_id": "uuid:2", + "created_at": "", + "updated_at": "", + "slug": null, + "game_type": null + } + ], + "parse_jobs": [ + { + "id": "uuid:3", + "replay_id": "uuid:1", + "parser_contract_version": "3.0.0", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "status": "succeeded", + "attempts": 1, + "published_at": "", + "started_at": null, + "finished_at": "", + "error": null, + "created_at": "", + "updated_at": "" + } + ], + "parser_results": [ + { + "id": "uuid:4", + "replay_id": "uuid:1", + "parse_job_id": "uuid:3", + "parser_contract_version": "3.0.0", + "status": "current", + "raw_snapshot": { + "parser": { + "name": "replay-parser-2", + "version": "0.1.0" + }, + "replay": { + "end_frame": { + "state": "present", + "value": 120 + }, + "world_name": { + "state": "present", + "value": "Altis" + }, + "time_bounds": { + "state": "present", + "value": { + "end_seconds": 60, + "start_seconds": 0 + } + }, + "frame_bounds": { + "state": "present", + "value": { + "end_frame": 120, + "start_frame": 0 + } + }, + "mission_name": { + "state": "present", + "value": "sg solid operation" + }, + "capture_delay": { + "state": "present", + "value": 0.5 + }, + "players_count": { + "state": "present", + "value": [ + 0, + 12, + 10 + ] + }, + "mission_author": { + "state": "present", + "value": "SolidGames" + } + }, + "source": { + "checksum": { + "state": "present", + "value": { + "value": "e41b8b54016a44259726474ee9b74cb5350ca23894e43732c37cde8d951d0eec", + "algorithm": "sha256" + } + }, + "source_file": "" + }, + "status": "success", + "contract_version": "3.0.0" + }, + "created_at": "" + } + ], + "parser_events": [], + "player_stats": [], + "squad_stats": [], + "commander_side_stats": [], + "bounty_points": [], + "ingest_staging_records": [ + { + "id": "uuid:2", + "source_system": "golden", + "source_replay_id": "golden-valid-minimal", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "status": "promoted", + "promotion_evidence": { + "replay_id": "uuid:1", + "parse_job_id": "uuid:3" + }, + "conflict_details": {}, + "created_at": "", + "updated_at": "" + } + ], + "GET /stats/overview": { + "filters": { + "rotationId": null + }, + "totals": { + "bountyPlayers": 0, + "commanderSides": 0, + "parsedReplays": 1, + "players": 0, + "playerStatRows": 0, + "replays": 1, + "squads": 0, + "squadStatRows": 0 + } + }, + "GET /stats/players": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/squads": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/bounty": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/replays": { + "hasMore": false, + "items": [ + { + "id": "uuid:1", + "slug": null, + "rotationId": null, + "replayTimestamp": "2026-05-09T00:00:00.000Z", + "sourceSystem": "golden", + "sourceReplayId": "golden-valid-minimal", + "status": "parsed" + } + ], + "nextCursor": null + } +} \ No newline at end of file diff --git a/src/test/golden/__snapshots__/pipeline-vehicle-context.snap.json b/src/test/golden/__snapshots__/pipeline-vehicle-context.snap.json new file mode 100644 index 0000000..3738f54 --- /dev/null +++ b/src/test/golden/__snapshots__/pipeline-vehicle-context.snap.json @@ -0,0 +1,440 @@ +{ + "replays": [ + { + "id": "uuid:1", + "source_system": "golden", + "source_replay_id": "golden-vehicle-context", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "rotation_id": null, + "status": "parsed", + "promotion_evidence": {}, + "promoted_from_staging_id": "uuid:2", + "created_at": "", + "updated_at": "", + "slug": null, + "game_type": null + } + ], + "parse_jobs": [ + { + "id": "uuid:3", + "replay_id": "uuid:1", + "parser_contract_version": "3.0.0", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "status": "succeeded", + "attempts": 1, + "published_at": "", + "started_at": null, + "finished_at": "", + "error": null, + "created_at": "", + "updated_at": "" + } + ], + "parser_results": [ + { + "id": "uuid:4", + "replay_id": "uuid:1", + "parse_job_id": "uuid:3", + "parser_contract_version": "3.0.0", + "status": "current", + "raw_snapshot": { + "parser": { + "name": "replay-parser-2", + "version": "0.1.0" + }, + "replay": { + "end_frame": { + "state": "present", + "value": 80 + }, + "world_name": { + "state": "present", + "value": "Altis" + }, + "time_bounds": { + "state": "present", + "value": { + "end_seconds": 40, + "start_seconds": 0 + } + }, + "frame_bounds": { + "state": "present", + "value": { + "end_frame": 80, + "start_frame": 0 + } + }, + "mission_name": { + "state": "present", + "value": "sg vehicle score" + }, + "capture_delay": { + "state": "present", + "value": 0.5 + }, + "players_count": { + "state": "present", + "value": [ + 0, + 3 + ] + }, + "mission_author": { + "state": "present", + "value": "SolidGames" + } + }, + "source": { + "checksum": { + "state": "present", + "value": { + "value": "74f29a3e513994e09cf8730a48ebe5cd239c5f3c6c3fddc534d49cd61634c1f2", + "algorithm": "sha256" + } + }, + "source_file": "" + }, + "status": "success", + "players": [ + { + "g": "Alpha 1-1", + "k": 1, + "n": "Tank Gunner", + "r": "Crewman", + "s": "west", + "tk": 1, + "vk": 3, + "eid": 1, + "kfv": 1, + "kills": [ + { + "c": "enemy_kill", + "v": 2, + "w": 1, + "av": 20, + "avc": "rhs_t72ba_tv" + }, + { + "c": "teamkill", + "v": 3, + "w": 1, + "av": 20, + "avc": "rhs_t72ba_tv" + } + ] + }, + { + "d": 1, + "g": "Bravo 1-1", + "n": "Enemy Rifleman", + "r": "Rifleman", + "s": "east", + "eid": 2 + }, + { + "d": 1, + "g": "Alpha 1-2", + "n": "Friendly Rifleman", + "r": "Rifleman", + "s": "west", + "td": 1, + "eid": 3 + } + ], + "weapons": [ + { + "n": "T-72", + "id": 1 + } + ], + "contract_version": "3.0.0", + "destroyed_vehicles": [ + { + "a": 1, + "c": "enemy", + "w": 1, + "av": 20, + "dc": "static-weapon", + "de": 10, + "dt": "static_weapon", + "avc": "rhs_t72ba_tv" + }, + { + "a": 1, + "c": "enemy", + "w": 1, + "av": 20, + "dc": "static-weapon", + "de": 10, + "dt": "static_weapon", + "avc": "rhs_t72ba_tv" + }, + { + "a": 1, + "c": "friendly", + "w": 1, + "av": 20, + "dc": "static-weapon", + "de": 21, + "dt": "static_weapon", + "avc": "rhs_t72ba_tv" + } + ] + }, + "created_at": "" + } + ], + "parser_events": [ + { + "id": "uuid:10", + "parser_result_id": "uuid:4", + "event_type": "destroyed_vehicle", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "weapon_id": 1, + "weapon_name": "T-72", + "classification": "enemy", + "destroyed_type": "static_weapon", + "destroyed_class": "static-weapon", + "attacker_entity_id": 1, + "destroyed_entity_id": 10 + }, + "source_ref": { + "destroyed_vehicle_index": 0 + }, + "created_at": "" + }, + { + "id": "uuid:11", + "parser_result_id": "uuid:4", + "event_type": "destroyed_vehicle", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "weapon_id": 1, + "weapon_name": "T-72", + "classification": "enemy", + "destroyed_type": "static_weapon", + "destroyed_class": "static-weapon", + "attacker_entity_id": 1, + "destroyed_entity_id": 10 + }, + "source_ref": { + "destroyed_vehicle_index": 1 + }, + "created_at": "" + }, + { + "id": "uuid:12", + "parser_result_id": "uuid:4", + "event_type": "destroyed_vehicle", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "weapon_id": 1, + "weapon_name": "T-72", + "classification": "friendly", + "destroyed_type": "static_weapon", + "destroyed_class": "static-weapon", + "attacker_entity_id": 1, + "destroyed_entity_id": 21 + }, + "source_ref": { + "destroyed_vehicle_index": 2 + }, + "created_at": "" + }, + { + "id": "uuid:8", + "parser_result_id": "uuid:4", + "event_type": "kill", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "attacker": { + "name": "Tank Gunner", + "role": "Crewman", + "side": "west", + "group": "Alpha 1-1", + "entity_id": 1 + }, + "weapon_id": 1, + "weapon_name": "T-72", + "classification": "enemy_kill", + "victim_entity_id": 2 + }, + "source_ref": { + "player_entity_id": 1, + "player_kill_index": 0 + }, + "created_at": "" + }, + { + "id": "uuid:5", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "kills": 1, + "player": { + "name": "Tank Gunner", + "role": "Crewman", + "side": "west", + "group": "Alpha 1-1", + "entity_id": 1 + }, + "teamkills": 1, + "vehicle_kills": 3, + "kills_from_vehicle": 1 + }, + "source_ref": { + "player_entity_id": 1 + }, + "created_at": "" + }, + { + "id": "uuid:6", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "2", + "payload": { + "player": { + "name": "Enemy Rifleman", + "role": "Rifleman", + "side": "east", + "group": "Bravo 1-1", + "entity_id": 2 + }, + "deaths_total": 1 + }, + "source_ref": { + "player_entity_id": 2 + }, + "created_at": "" + }, + { + "id": "uuid:7", + "parser_result_id": "uuid:4", + "event_type": "player_counter", + "occurred_at": null, + "observed_player_ref": "3", + "payload": { + "player": { + "name": "Friendly Rifleman", + "role": "Rifleman", + "side": "west", + "group": "Alpha 1-2", + "entity_id": 3 + }, + "deaths_total": 1, + "deaths_by_teamkills": 1 + }, + "source_ref": { + "player_entity_id": 3 + }, + "created_at": "" + }, + { + "id": "uuid:9", + "parser_result_id": "uuid:4", + "event_type": "teamkill", + "occurred_at": null, + "observed_player_ref": "1", + "payload": { + "attacker": { + "name": "Tank Gunner", + "role": "Crewman", + "side": "west", + "group": "Alpha 1-1", + "entity_id": 1 + }, + "weapon_id": 1, + "weapon_name": "T-72", + "classification": "teamkill", + "victim_entity_id": 3 + }, + "source_ref": { + "player_entity_id": 1, + "player_kill_index": 1 + }, + "created_at": "" + } + ], + "player_stats": [], + "squad_stats": [], + "commander_side_stats": [], + "bounty_points": [], + "ingest_staging_records": [ + { + "id": "uuid:2", + "source_system": "golden", + "source_replay_id": "golden-vehicle-context", + "object_key": "", + "checksum": "0000000000000000000000000000000000000000000000000000000000000001", + "size_bytes": "123", + "replay_timestamp": "2026-05-09T00:00:00.000Z", + "status": "promoted", + "promotion_evidence": { + "replay_id": "uuid:1", + "parse_job_id": "uuid:3" + }, + "conflict_details": {}, + "created_at": "", + "updated_at": "" + } + ], + "GET /stats/overview": { + "filters": { + "rotationId": null + }, + "totals": { + "bountyPlayers": 0, + "commanderSides": 0, + "parsedReplays": 1, + "players": 0, + "playerStatRows": 0, + "replays": 1, + "squads": 0, + "squadStatRows": 0 + } + }, + "GET /stats/players": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/squads": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/bounty": { + "hasMore": false, + "items": [], + "nextCursor": null + }, + "GET /stats/replays": { + "hasMore": false, + "items": [ + { + "id": "uuid:1", + "slug": null, + "rotationId": null, + "replayTimestamp": "2026-05-09T00:00:00.000Z", + "sourceSystem": "golden", + "sourceReplayId": "golden-vehicle-context", + "status": "parsed" + } + ], + "nextCursor": null + } +} \ No newline at end of file diff --git a/src/test/golden/bounty-anchor.golden.test.ts b/src/test/golden/bounty-anchor.golden.test.ts new file mode 100644 index 0000000..b50baee --- /dev/null +++ b/src/test/golden/bounty-anchor.golden.test.ts @@ -0,0 +1,369 @@ +/* eslint-disable init-declarations, max-lines, no-magic-numbers, @typescript-eslint/no-unnecessary-condition */ +// +// bounty-anchor.golden.test.ts — hand-computed bounty anchors. +// +// Bounty is business-critical, so these assert SEMANTICS with toEqual hand-computed +// values (not only snapshot equality) — [testing-standards §G strong oracle]. A +// non-trivial bounty needs a seeded PREVIOUS rotation supplying effectiveness +// (RESEARCH §5): points = round₂((1+playerEff)·(1+squadEff)), +// eff = kills / max(1, deaths.total). Expected values are recomputed BY HAND from +// the formula — never by calling the production calculator (RESEARCH Don't-Hand-Roll). +// +// Source artifact: the parser-2 `aggregate-combat` floor artifact, where Alpha(eid 1) +// enemy-kills Bravo(eid 2) and Echo(eid 6) teamkills itself — covering player-only +// effectiveness, player+squad effectiveness, and the excluded-teamkill case. +import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3"; +import { Pool } from "pg"; +import { afterAll, beforeAll, beforeEach, describe, expect, it } from "vitest"; + +import { runMigrations } from "../../infra/db/migrate.js"; +import { + createRabbitMqParserRuntime, + type RabbitMqParserRuntime, +} from "../../infra/queue/rabbitmq.js"; +import { createStorageClient } from "../../infra/storage/client.js"; +import { PgIngestRepository } from "../../modules/ingest/repository/repository.js"; +import { IngestPromotionService } from "../../modules/ingest/service.js"; +import { PgStatisticsRepository } from "../../modules/statistics/repository/repository.js"; +import { ParserResultRecalculationService } from "../../modules/statistics/service/recalculation.js"; + +import { + TRUNCATE_ALL, + completedMessage, + pollUntil, + publishCompleted, + purgeParserQueues, +} from "./fixtures/harness.js"; +import { + archivePresent, + goldenConfig, + goldenInfraReachable, + loadGoldenArtifacts, +} from "./fixtures/loader.js"; + +import type { ParserArtifact } from "../../modules/statistics/parser-artifact.js"; + +const config = goldenConfig(), + pool = new Pool({ connectionString: config.databaseUrl }), + runId = Date.now().toString(36), + completedReplays = new Set(), + // The aggregate-combat artifact is the bounty source (Alpha→Bravo enemy_kill, + // Echo→Echo teamkill). Loaded synchronously at collection; runtime infra-absence + // is handled by the per-test early return. + aggregateCombat: ParserArtifact | undefined = archivePresent() + ? loadGoldenArtifacts().find((item) => item.name === "aggregate-combat") + ?.artifact + : undefined; + +let infraReachable = false, + broker: RabbitMqParserRuntime, + storage: ReturnType, + s3: S3Client, + ingestRepository: PgIngestRepository, + promotionService: IngestPromotionService; + +const CURRENT_STARTS = "2026-05-01T00:00:00.000Z", + PREV_STARTS = "2026-04-01T00:00:00.000Z", + REPLAY_TS = "2026-05-09T00:00:00.000Z"; + +beforeAll(async () => { + infraReachable = await goldenInfraReachable(config); + if (!infraReachable) { + return; + } + await runMigrations(config.databaseUrl); + await purgeParserQueues(config.rabbitmqUrl); + ingestRepository = new PgIngestRepository(pool); + const statisticsRepository = new PgStatisticsRepository(pool), + recalculation = new ParserResultRecalculationService(statisticsRepository); + promotionService = new IngestPromotionService(ingestRepository); + storage = createStorageClient(config); + broker = await createRabbitMqParserRuntime(config); + s3 = new S3Client({ + credentials: { + accessKeyId: config.s3.accessKeyId, + secretAccessKey: config.s3.secretAccessKey, + }, + endpoint: config.s3.endpoint, + forcePathStyle: config.s3.forcePathStyle, + region: config.s3.region, + }); + await broker.consumeParserResults({ + completed: async (message) => { + const artifact = await storage.loadParserArtifact(message.artifact), + parserResultId = await ingestRepository.recordParserCompleted({ + ...message, + rawSnapshot: artifact, + }); + if (parserResultId !== null) { + await recalculation.recalculateParserResult(parserResultId, artifact); + } + completedReplays.add(message.replay_id); + }, + failed: () => Promise.resolve(), + }); +}); + +afterAll(async () => { + await broker?.close(); + s3?.destroy(); + await storage?.close(); + await pool.end(); +}); + +beforeEach(async () => { + if (infraReachable) { + await pool.query(TRUNCATE_ALL); + } +}); + +describe.skipIf(!archivePresent())("golden bounty anchors", () => { + it("documents a clean skip when docker-compose is absent", () => { + expect(typeof infraReachable).toBe("boolean"); + }); + + it("(a) player-only effectiveness: Bravo prev kills=3/deaths=1 → Alpha points=4.00", async () => { + if (!infraReachable) { + return; + } + const seeds = await seedRotationsAndPlayers(); + // Victim Bravo previous-rotation effectiveness: 3 / max(1,1) = 3 → playerEff=3. + // No squad seeded for Bravo → squadFactor=0. Alpha points = round₂(1·(1+3)·(1+0)) = 4.00. + await seedPreviousPlayerStats({ + deathsTotal: 1, + entityId: seeds.bravoId, + kills: 3, + rotationId: seeds.prevRotationId, + }); + + await driveBounty(); + + const alphaPoints = await rotationBountyPoints( + seeds.currentRotationId, + seeds.alphaId, + ); + expect(alphaPoints).toEqual(4); + }); + + it("(b) player+squad effectiveness: Bravo player 2/1 + squad 4/2 → Alpha points=9.00", async () => { + if (!infraReachable) { + return; + } + const seeds = await seedRotationsAndPlayers(); + // Put Bravo in a squad and give that squad an active membership covering the replay, + // so the kill carries victimSquadId. Player eff = 2/1 = 2 → playerFactor=2. + // Squad eff = 4/2 = 2 → squadFactor=2. points = round₂(1·(1+2)·(1+2)) = 9.00. + const squadId = await seedSquadForPlayer(seeds.bravoId); + await seedPreviousPlayerStats({ + deathsTotal: 1, + entityId: seeds.bravoId, + kills: 2, + rotationId: seeds.prevRotationId, + }); + await seedPreviousSquadStats({ + deathsTotal: 2, + entityId: squadId, + kills: 4, + rotationId: seeds.prevRotationId, + }); + + await driveBounty(); + + const alphaPoints = await rotationBountyPoints( + seeds.currentRotationId, + seeds.alphaId, + ); + expect(alphaPoints).toEqual(9); + }); + + it("(c) excluded teamkill → 0: Echo teamkill earns no bounty", async () => { + if (!infraReachable) { + return; + } + const seeds = await seedRotationsAndPlayers(); + // No previous stats needed: a teamkill is excluded regardless (bounty.ts:107). + await driveBounty(); + + const echoPoints = await rotationBountyPoints( + seeds.currentRotationId, + seeds.echoId, + ); + expect(echoPoints).toEqual(0); + }); +}); + +interface AnchorSeeds { + alphaId: string; + bravoId: string; + currentRotationId: string; + echoId: string; + prevRotationId: string; +} + +async function seedRotationsAndPlayers(): Promise { + if (aggregateCombat === undefined) { + throw new Error("aggregate-combat fixture missing from floor archive"); + } + const previous = await pool.query<{ id: string }>( + "insert into rotations (name, starts_at, ends_at) values ('prev', $1, $2) returning id", + [PREV_STARTS, CURRENT_STARTS], + ), + current = await pool.query<{ id: string }>( + "insert into rotations (name, starts_at, ends_at) values ('current', $1, null) returning id", + [CURRENT_STARTS], + ), + // Canonical players resolve by display_name (case-insensitive) — match the + // artifact callsigns Alpha/Bravo/Echo (parser-artifact identity path). + alpha = await insertCanonicalPlayer("Alpha"), + bravo = await insertCanonicalPlayer("Bravo"), + echo = await insertCanonicalPlayer("Echo"); + return { + alphaId: alpha, + bravoId: bravo, + currentRotationId: current.rows[0]?.id ?? "", + echoId: echo, + prevRotationId: previous.rows[0]?.id ?? "", + }; +} + +async function insertCanonicalPlayer(displayName: string): Promise { + const result = await pool.query<{ id: string }>( + "insert into canonical_players (display_name) values ($1) returning id", + [displayName], + ); + return result.rows[0]?.id ?? ""; +} + +async function seedSquadForPlayer(playerId: string): Promise { + const squad = await pool.query<{ id: string }>( + "insert into squads (name, tag) values ('Bravo Squad', '[B]') returning id", + [], + ), + squadId = squad.rows[0]?.id ?? ""; + await pool.query( + `insert into squad_memberships (squad_id, player_id, valid_from, valid_to) + values ($1, $2, $3, null)`, + [squadId, playerId, PREV_STARTS], + ); + return squadId; +} + +interface PreviousStatsInput { + deathsTotal: number; + entityId: string; + kills: number; + rotationId: string; +} + +async function seedPreviousPlayerStats( + input: PreviousStatsInput, +): Promise { + await pool.query( + `insert into player_stats (rotation_id, player_id, stats, game_type, is_show) + values ($1, $2, $3::jsonb, 'sg', true)`, + [ + input.rotationId, + input.entityId, + JSON.stringify({ + deaths: { total: input.deathsTotal }, + kills: input.kills, + }), + ], + ); +} + +async function seedPreviousSquadStats( + input: PreviousStatsInput, +): Promise { + await pool.query( + `insert into squad_stats (rotation_id, squad_id, stats, game_type) + values ($1, $2, $3::jsonb, 'sg')`, + [ + input.rotationId, + input.entityId, + JSON.stringify({ + deaths: { total: input.deathsTotal }, + kills: input.kills, + }), + ], + ); +} + +/** + * Drive the real chain for the aggregate-combat artifact and force the replay's + * game_type to 'sg' (the ingest path never sets game_type; without it the bounty + * scope is empty — RESEARCH §5 / repository.ts auditScopes). Bounded-poll to the + * finished chain, then the caller reads bounty_points. + */ +async function driveBounty(): Promise { + if (aggregateCombat === undefined) { + throw new Error("aggregate-combat fixture missing"); + } + const objectKey = `artifacts/v3/${runId}/bounty-${Date.now().toString(36)}.json`, + checksum = `${"0".repeat(63)}2`, + sourceReplayId = `bounty-${Date.now().toString(36)}`; + + await s3.send( + new PutObjectCommand({ + Body: JSON.stringify(aggregateCombat), + Bucket: config.s3.bucket, + Key: objectKey, + }), + ); + await pool.query( + `insert into ingest_staging_records + (source_system, source_replay_id, object_key, checksum, size_bytes, replay_timestamp, promotion_evidence) + values ('golden', $1, $2, $3, 123, $4, '{}'::jsonb)`, + [sourceReplayId, objectKey, checksum, REPLAY_TS], + ); + const [promotion] = await promotionService.promotePending({ + batchSize: 1, + parserContractVersion: "3.0.0", + }); + expect(promotion).toMatchObject({ status: "promoted" }); + + const job = await pool.query<{ id: string; replay_id: string }>( + "select id, replay_id from parse_jobs", + ); + const jobId = job.rows[0]?.id ?? "", + replayId = job.rows[0]?.replay_id ?? ""; + // Force sg game_type so the bounty scope is non-empty. + await pool.query("update replays set game_type = 'sg' where id = $1", [ + replayId, + ]); + await pool.query("update parse_jobs set status = 'published' where id = $1", [ + jobId, + ]); + + await publishCompleted( + config.rabbitmqUrl, + completedMessage({ + bucket: config.s3.bucket, + checksum, + jobId, + objectKey, + replayId, + }), + ); + await pollUntil(async () => { + const result = await pool.query<{ status: string }>( + "select status from parse_jobs where id = $1", + [jobId], + ); + return ( + result.rows[0]?.status === "succeeded" && completedReplays.has(replayId) + ); + }); +} + +async function rotationBountyPoints( + rotationId: string, + playerId: string, +): Promise { + const result = await pool.query<{ points: string }>( + "select points from bounty_points where rotation_id = $1 and player_id = $2", + [rotationId, playerId], + ); + const raw = result.rows[0]?.points; + return raw === undefined ? undefined : Number(raw); +} diff --git a/src/test/golden/fixtures/artifacts.tar.gz b/src/test/golden/fixtures/artifacts.tar.gz new file mode 100644 index 0000000..ed1eca7 Binary files /dev/null and b/src/test/golden/fixtures/artifacts.tar.gz differ diff --git a/src/test/golden/fixtures/harness.ts b/src/test/golden/fixtures/harness.ts new file mode 100644 index 0000000..39bf69f --- /dev/null +++ b/src/test/golden/fixtures/harness.ts @@ -0,0 +1,210 @@ +/* eslint-disable camelcase */ +// +// harness.ts — shared golden-suite helpers (one place, no duplication — principle 9). +// +// - TRUNCATE_ALL: the truncate…cascade isolation statement (CONTEXT Step-0 override). +// - pollUntil: bounded DB-state poll with a HARD timeout ceiling — the only backstop +// against the consumer's nack-requeue loop (RESEARCH §await-seam). A real-broker +// integration poll is permitted ([testing-standards §E]) since deterministic clock +// control is impossible for a live broker. +// - publishCompleted: publish a real parse.completed onto the parser exchange via a +// dedicated confirm channel (the runtime's publishJson is typed for parse.requested +// only, so the test owns a raw publisher for the completed message). +// - snapshotSurface: read + normalize the full observable DB surface plus GET /stats/*. +import * as amqp from "amqplib"; + +import { + parseCompletedQueue, + parseCompletedRoutingKey, + parseFailedQueue, + parseRequestedQueue, + parserExchange, + type ParseCompletedMessage, +} from "../../../infra/queue/messages.js"; + +import { UuidMap, normalizeRows, normalizeValue } from "./normalize.js"; + +import type { FastifyInstance } from "fastify"; +import type { Pool } from "pg"; + +export const TRUNCATE_ALL = `truncate + parser_events, parser_results, player_stats, squad_stats, commander_side_stats, + bounty_points, parse_jobs, replays, ingest_staging_records, rotations, + squad_memberships, player_nicknames, player_steam_ids, squads, canonical_players + cascade`; + +const POLL_TIMEOUT_MS = 30_000, + POLL_INTERVAL_MS = 200; + +/** + * Bounded poll: resolve when `predicate()` is true, throw on a HARD timeout. The + * timeout is the backstop against the parse.completed nack-requeue loop — never + * remove it. Plain setTimeout (no fake timers) is correct for a live-broker poll. + */ +export async function pollUntil( + predicate: () => Promise, +): Promise { + const deadline = Date.now() + POLL_TIMEOUT_MS; + while (Date.now() < deadline) { + if (await predicate()) { + return; + } + await new Promise((resolve) => { + setTimeout(resolve, POLL_INTERVAL_MS); + }); + } + throw new Error(`pollUntil timed out after ${String(POLL_TIMEOUT_MS)}ms`); +} + +/** + * Purge the durable parser queues so a prior run's nack-requeued messages never + * bleed into this run (the topology is durable — rabbitmq.ts:149). Asserts the + * topology first so purge targets existing queues. + */ +export async function purgeParserQueues(url: string): Promise { + const connection = await amqp.connect(url), + channel = await connection.createChannel(); + await channel.assertExchange(parserExchange, "direct", { durable: true }); + for (const queue of [ + parseRequestedQueue, + parseCompletedQueue, + parseFailedQueue, + ]) { + await channel.assertQueue(queue, { durable: true }); + await channel.purgeQueue(queue); + } + await channel.close(); + await connection.close(); +} + +/** A dedicated raw publisher for parse.completed (broker.publishJson is typed for requests). */ +export async function publishCompleted( + runtimeUrl: string, + message: ParseCompletedMessage, +): Promise { + const connection = await amqp.connect(runtimeUrl), + channel = await connection.createConfirmChannel(); + await channel.assertExchange(parserExchange, "direct", { durable: true }); + await new Promise((resolve, reject) => { + channel.publish( + parserExchange, + parseCompletedRoutingKey, + Buffer.from(JSON.stringify(message)), + { contentType: "application/json", persistent: true }, + (error) => { + if (error === null) { + resolve(); + } else { + reject(error instanceof Error ? error : new Error(String(error))); + } + }, + ); + }); + await channel.close(); + await connection.close(); +} + +/** Build a well-formed parse.completed message for a job/replay/key/checksum. */ +export function completedMessage(input: { + bucket: string; + checksum: string; + jobId: string; + objectKey: string; + replayId: string; +}): ParseCompletedMessage { + return { + artifact: { bucket: input.bucket, key: input.objectKey }, + artifact_checksum: { algorithm: "sha256", value: input.checksum }, + artifact_size_bytes: 1234, + job_id: input.jobId, + message_type: "parse.completed", + parser: { name: "replay-parser-2", version: "0.1.0" }, + parser_contract_version: "3.0.0", + replay_id: input.replayId, + source_checksum: { algorithm: "sha256", value: input.checksum }, + }; +} + +const SURFACE_TABLES: { + key: (row: Record) => string; + sql: string; + table: string; +}[] = [ + { + key: (row) => String(row["source_replay_id"]), + sql: "select * from replays", + table: "replays", + }, + { + key: (row) => + `${String(row["status"])}|${String(row["parser_contract_version"])}`, + sql: "select * from parse_jobs", + table: "parse_jobs", + }, + { + key: (row) => String(row["status"]), + sql: "select * from parser_results", + table: "parser_results", + }, + { + key: (row) => + `${String(row["event_type"])}|${String(row["observed_player_ref"])}|${JSON.stringify(row["source_ref"])}`, + sql: "select * from parser_events", + table: "parser_events", + }, + { + key: (row) => JSON.stringify(row["stats"]), + sql: "select * from player_stats", + table: "player_stats", + }, + { + key: (row) => JSON.stringify(row["stats"]), + sql: "select * from squad_stats", + table: "squad_stats", + }, + { + key: (row) => `${String(row["side"])}|${String(row["game_type"])}`, + sql: "select * from commander_side_stats", + table: "commander_side_stats", + }, + { + key: (row) => String(row["points"]), + sql: "select * from bounty_points", + table: "bounty_points", + }, + { + key: (row) => String(row["status"]), + sql: "select * from ingest_staging_records", + table: "ingest_staging_records", + }, +]; + +const STATS_ENDPOINTS = [ + "/stats/overview", + "/stats/players", + "/stats/squads", + "/stats/bounty", + "/stats/replays", +]; + +/** + * Read + normalize the full observable DB surface (uuid->token, timestamp redaction, + * row sort by natural key) plus the GET /stats/* responses. One shared UuidMap so the + * SAME uuid maps to the SAME token across tables AND the API responses. + */ +export async function snapshotSurface( + pool: Pool, + app: FastifyInstance, +): Promise> { + const uuids = new UuidMap(), + surface: Record = {}; + for (const entry of SURFACE_TABLES) { + const result = await pool.query>(entry.sql); + surface[entry.table] = normalizeRows(result.rows, uuids, entry.key); + } + for (const url of STATS_ENDPOINTS) { + const response = await app.inject({ method: "GET", url }); + surface[`GET ${url}`] = normalizeValue(response.json(), uuids); + } + return surface; +} diff --git a/src/test/golden/fixtures/loader.ts b/src/test/golden/fixtures/loader.ts new file mode 100644 index 0000000..5769617 --- /dev/null +++ b/src/test/golden/fixtures/loader.ts @@ -0,0 +1,122 @@ +// +// loader.ts — the ONE shared fixture loader/unpacker + skip guards for the golden oracle. +// +// Unpacks the committed artifacts.tar.gz to a per-run tmp dir and exposes the real +// ParserArtifact JSONs typed as the PRODUCTION `ParserArtifact` (never a hand-mirrored +// shape). Two guards drive clean SKIP (never FAIL) when infra/archive are absent: +// - archivePresent(): the committed fixture archive exists. +// - dockerReachable(config): PG + RabbitMQ + S3 all answer a health probe. +// +// [tests Integration Harness] real adapters via createDbClient/createQueueClient/ +// createStorageClient; [testing-standards §B] skip cleanly without containers. +import { execFileSync } from "node:child_process"; +import { existsSync, mkdtempSync, readdirSync, readFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { loadConfig, type AppConfig } from "../../../config/env.js"; +import { createDbClient } from "../../../infra/db/client.js"; +import { createQueueClient } from "../../../infra/queue/client.js"; +import { createStorageClient } from "../../../infra/storage/client.js"; + +import type { ParserArtifact } from "../../../modules/statistics/parser-artifact.js"; + +// Fixed-port docker-compose defaults mirrored from adapters.test.ts / postgres.test.ts. +export const goldenEnvironment = { + DATABASE_URL: + process.env["DATABASE_URL"] ?? + "postgresql://solid:solid@localhost:15432/solid_stats", + RABBITMQ_URL: + process.env["RABBITMQ_URL"] ?? "amqp://solid:solid@localhost:5673", + S3_ACCESS_KEY_ID: process.env["S3_ACCESS_KEY_ID"] ?? "solid", + S3_BUCKET: process.env["S3_BUCKET"] ?? "solid-replays", + S3_ENDPOINT: process.env["S3_ENDPOINT"] ?? "http://localhost:9000", + S3_FORCE_PATH_STYLE: process.env["S3_FORCE_PATH_STYLE"] ?? "true", + S3_REGION: process.env["S3_REGION"] ?? "us-east-1", + S3_SECRET_ACCESS_KEY: process.env["S3_SECRET_ACCESS_KEY"] ?? "solidsecret", +}; + +export function goldenConfig(): AppConfig { + return loadConfig(goldenEnvironment); +} + +const ARCHIVE_PATH = fileURLToPath( + new URL("artifacts.tar.gz", import.meta.url), +); + +/** True when the committed fixture archive exists; false → suites skip cleanly. */ +export function archivePresent(): boolean { + return existsSync(ARCHIVE_PATH); +} + +export interface GoldenArtifact { + artifact: ParserArtifact; + name: string; +} + +/** + * Unpack the committed archive to a fresh per-run tmp dir and return the parsed + * artifacts (typed as the production `ParserArtifact`). Caller guards on + * archivePresent() first; this throws if the archive is missing. + */ +export function loadGoldenArtifacts(): GoldenArtifact[] { + if (!archivePresent()) { + throw new Error(`golden fixture archive missing at ${ARCHIVE_PATH}`); + } + const unpackDirectory = mkdtempSync(join(tmpdir(), "golden-")); + execFileSync("tar", ["xzf", ARCHIVE_PATH, "-C", unpackDirectory]); + return readdirSync(unpackDirectory) + .filter((file) => file.endsWith(".json")) + .toSorted((left, right) => left.localeCompare(right)) + .map((file) => ({ + artifact: JSON.parse( + readFileSync(join(unpackDirectory, file), "utf8"), + ) as ParserArtifact, + name: file.replace(/\.json$/u, ""), + })); +} + +/** + * Probe PG + RabbitMQ + S3 via the SAME adapters production wires. Returns false on + * any connect/health failure so suites SKIP (not FAIL) when docker-compose is down. + */ +export async function dockerReachable(config: AppConfig): Promise { + const db = createDbClient(config), + queue = createQueueClient(config), + storage = createStorageClient(config); + try { + const results = await Promise.all([ + db.check(), + queue.check(), + storage.check(), + ]); + return results.every((result) => result.status === "ok"); + } catch { + return false; + } finally { + await Promise.allSettled([db.close(), queue.close(), storage.close()]); + } +} + +/** + * The ONE shared golden skip-guard (principle 9 — no per-suite duplication of the + * archive-check + infra-probe sequence). A golden suite runs its live block ONLY + * when BOTH the committed fixture archive is present AND the docker-compose infra + * (PG + RabbitMQ + S3) answers a health probe; otherwise every suite SKIPS cleanly + * (never FAILS), so `pnpm test:golden` is a green no-op without Docker/archive. + * + * Returns true when the live chain may run. Each suite calls this once in + * `beforeAll` and gates its `beforeEach`/`it` bodies on the returned flag (the + * probe cannot be awaited inside `describe.skipIf`, so the per-suite flag is the + * runtime gate; `describe.skipIf(!archivePresent())` skips at collection time when + * the archive is absent so empty `test.each` tables never collect a phantom case). + */ +export async function goldenInfraReachable( + config: AppConfig, +): Promise { + if (!archivePresent()) { + return false; + } + return dockerReachable(config); +} diff --git a/src/test/golden/fixtures/normalize.test.ts b/src/test/golden/fixtures/normalize.test.ts new file mode 100644 index 0000000..330531d --- /dev/null +++ b/src/test/golden/fixtures/normalize.test.ts @@ -0,0 +1,60 @@ +/* eslint-disable camelcase, no-magic-numbers, unicorn/no-null */ +// +// normalize.test.ts — guards the snapshot normalizer's value handling. +// +// Pure unit test (no infra): lives under src/test/golden so it runs with +// `pnpm test:golden` alongside the suites whose snapshots it protects. The Date +// branch is the regression this permanently pins: pg returns timestamptz columns +// as JS Date, and a missing Date branch silently collapsed them to "{}" (REVIEW #1). +import { describe, expect, it } from "vitest"; + +import { UuidMap, normalizeValue } from "./normalize.js"; + +describe("normalizeValue", () => { + it("round-trips a Date to its UTC ISO string (never {} — REVIEW #1)", () => { + const date = new Date("2026-05-09T00:00:00.000Z"); + + const result = normalizeValue(date, new UuidMap()); + + expect(result).toBe("2026-05-09T00:00:00.000Z"); + }); + + it("maps a uuid string through the stable token map", () => { + const uuids = new UuidMap(), + uuid = "11111111-1111-1111-1111-111111111111"; + + expect(normalizeValue(uuid, uuids)).toBe("uuid:1"); + // Same uuid → same token; a different uuid → next token. + expect(normalizeValue(uuid, uuids)).toBe("uuid:1"); + expect(normalizeValue("22222222-2222-2222-2222-222222222222", uuids)).toBe( + "uuid:2", + ); + }); + + it("leaves a non-uuid primitive untouched", () => { + expect(normalizeValue("golden", new UuidMap())).toBe("golden"); + expect(normalizeValue(123, new UuidMap())).toBe(123); + expect(normalizeValue(null, new UuidMap())).toBeNull(); + }); + + it("redacts a now()-driven timestamp column inside an object by key", () => { + const result = normalizeValue( + { created_at: new Date("2026-05-09T00:00:00.000Z"), name: "x" }, + new UuidMap(), + ); + + // created_at is in TIMESTAMP_KEYS → redacted; name survives. + expect(result).toEqual({ created_at: "", name: "x" }); + }); + + it("keeps a deterministic Date column (not in TIMESTAMP_KEYS) as its ISO value", () => { + const result = normalizeValue( + { replay_timestamp: new Date("2026-05-09T00:00:00.000Z") }, + new UuidMap(), + ); + + expect(result).toEqual({ + replay_timestamp: "2026-05-09T00:00:00.000Z", + }); + }); +}); diff --git a/src/test/golden/fixtures/normalize.ts b/src/test/golden/fixtures/normalize.ts new file mode 100644 index 0000000..2298f40 --- /dev/null +++ b/src/test/golden/fixtures/normalize.ts @@ -0,0 +1,116 @@ +// +// normalize.ts — the ONE shared snapshot normalizer for the golden oracle. +// +// Characterization snapshots must be deterministic across runs. Two sources of +// non-determinism enter the asserted surface (RESEARCH §4): gen_random_uuid() ids +// (+ their fk references) and now()-based timestamps. This module: +// - builds a uuid -> stable token map (first-seen order, "uuid:1", "uuid:2", …), +// substituting BOTH primary ids AND fk columns through the same map; +// - redacts known timestamp columns to a fixed token; +// - redacts the parser artifact's local source_file path (absolute, host-specific); +// - sorts rows by a caller-supplied natural key so DB scan order never matters. +// +// Contractual order (cursor-paginated lists, bounty inputs.events[]) is NOT re-sorted — +// the caller simply does not pass those through sortRows(). + +const TIMESTAMP_TOKEN = ""; + +const UUID_PATTERN = + /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/u; + +const TIMESTAMP_KEYS = new Set([ + "calculated_at", + "created_at", + "finished_at", + "published_at", + "started_at", + "updated_at", +]); + +/** + * A stable uuid -> token map shared across all rows of one snapshot so the SAME + * uuid (whether it appears as a primary id or an fk) always maps to the SAME token. + */ +export class UuidMap { + private readonly tokens = new Map(); + + public token(uuid: string): string { + const existing = this.tokens.get(uuid); + if (existing !== undefined) { + return existing; + } + const token = `uuid:${String(this.tokens.size + 1)}`; + this.tokens.set(uuid, token); + return token; + } +} + +/** + * Recursively normalize a value: map every uuid string (primary id or fk) through + * `uuids`, redact timestamp-named fields, and redact the artifact source_file path. + * Arrays keep their order (callers control row order via sortRows()). + */ +export function normalizeValue(value: unknown, uuids: UuidMap): unknown { + if (value instanceof Date) { + // pg returns timestamptz columns as JS Date. Without this branch a Date falls + // into the generic-object branch below → Object.entries(date) === [] → "{}", + // silently erasing a deterministic field (e.g. replay_timestamp, set from a + // fixed staging literal). Pin its UTC ISO string; key-based redaction in + // normalizeObject still handles genuinely now()-driven columns (TIMESTAMP_KEYS). + return value.toISOString(); + } + if (typeof value === "string") { + return UUID_PATTERN.test(value) ? uuids.token(value) : value; + } + if (Array.isArray(value)) { + return value.map((item) => normalizeValue(item, uuids)); + } + if (value !== null && typeof value === "object") { + return normalizeObject(value as Record, uuids); + } + return value; +} + +function normalizeObject( + value: Record, + uuids: UuidMap, +): Record { + const result: Record = {}; + for (const [key, item] of Object.entries(value)) { + if (TIMESTAMP_KEYS.has(key) && item !== null) { + result[key] = TIMESTAMP_TOKEN; + continue; + } + if (key === "source_file" && typeof item === "string") { + // Absolute, host-specific local path from the parser-2 CLI run — redact. + result[key] = ""; + continue; + } + if (key === "object_key" && typeof item === "string") { + // Embeds a per-run id (artifacts/v3//…) for live-S3 isolation — redact. + result[key] = ""; + continue; + } + result[key] = normalizeValue(item, uuids); + } + return result; +} + +/** + * Normalize a list of DB rows and sort by a caller-supplied natural key, so the + * snapshot is independent of DB scan order. Use ONLY for unordered table dumps; + * never for contractually-ordered surfaces (cursor lists, bounty events). + */ +export function normalizeRows( + rows: Record[], + uuids: UuidMap, + naturalKey: (row: Record) => string, +): Record[] { + return rows + .map((row) => ({ + key: naturalKey(row), + normalized: normalizeObject(row, uuids), + })) + .toSorted((left, right) => left.key.localeCompare(right.key)) + .map((entry) => entry.normalized); +} diff --git a/src/test/golden/invariants.golden.test.ts b/src/test/golden/invariants.golden.test.ts new file mode 100644 index 0000000..60549a5 --- /dev/null +++ b/src/test/golden/invariants.golden.test.ts @@ -0,0 +1,317 @@ +/* eslint-disable camelcase, init-declarations, max-lines, max-lines-per-function, no-magic-numbers, unicorn/no-null */ +// +// invariants.golden.test.ts — idempotency / dedup / conflict / re-delivery / role-gate. +// +// Pins the current invariants from CONTEXT as-is (principle 7). Conflict and +// parse.failed are documented SYNTHETIC non-broker paths (RESEARCH open-item #2): +// a throwing artifact must never be round-tripped through the real broker (it would +// nack-requeue forever — rabbitmq.ts:126-128), so these exercise the repository / +// service directly. The role-gate uses the in-memory auth stores wired into buildApp. +import { Pool } from "pg"; +import { afterAll, beforeAll, beforeEach, describe, expect, it } from "vitest"; + +import { buildApp, createDefaultAuthOptions } from "../../app.js"; +import { runMigrations } from "../../infra/db/migrate.js"; +import { + InMemoryAuthUserRepository, + InMemorySessionStore, +} from "../../modules/auth/routes/memory.js"; +import { PgIngestRepository } from "../../modules/ingest/repository/repository.js"; +import { IngestPromotionService } from "../../modules/ingest/service.js"; + +import { TRUNCATE_ALL } from "./fixtures/harness.js"; +import { + archivePresent, + goldenConfig, + goldenInfraReachable, +} from "./fixtures/loader.js"; + +import type { AuthRouteOptions } from "../../modules/auth/routes/models.js"; + +const config = goldenConfig(), + pool = new Pool({ connectionString: config.databaseUrl }), + checksumA = "a".repeat(64), + checksumB = "b".repeat(64), + replayTs = "2026-05-09T00:00:00.000Z"; + +let infraReachable = false, + repository: PgIngestRepository, + service: IngestPromotionService; + +beforeAll(async () => { + infraReachable = await goldenInfraReachable(config); + if (!infraReachable) { + return; + } + await runMigrations(config.databaseUrl); + repository = new PgIngestRepository(pool); + service = new IngestPromotionService(repository); +}); + +afterAll(async () => { + await pool.end(); +}); + +beforeEach(async () => { + if (infraReachable) { + await pool.query(TRUNCATE_ALL); + } +}); + +describe.skipIf(!archivePresent())("golden ingest invariants", () => { + it("documents a clean skip when docker-compose is absent", () => { + // A Docker-less run is a clean pass; the live blocks below are gated on + // infraReachable so they never run (and never fail) without infra. Collection-time + // archive absence skips the whole suite via describe.skipIf (parity with the + // pipeline/bounty suites — honest skipped, not phantom-passed, reporting). + expect(typeof infraReachable).toBe("boolean"); + }); + + it("1. creates the durable parse_jobs row BEFORE any publish (never fire-and-forget)", async () => { + if (!infraReachable) { + return; + } + await insertStaging("src-1", "rep-1", checksumA); + const [result] = await service.promotePending({ + batchSize: 1, + parserContractVersion: "3.0.0", + }); + expect(result).toMatchObject({ status: "promoted" }); + // The durable job exists in 'queued' (not yet published) — publish is a later task. + const jobs = await pool.query<{ status: string }>( + "select status from parse_jobs", + ); + expect(jobs.rows).toHaveLength(1); + expect(jobs.rows[0]?.status).toBe("queued"); + }); + + it("2. re-promoting the same staging row dedups → status=promoted + duplicate_replay_id", async () => { + if (!infraReachable) { + return; + } + await insertStaging("src-2", "rep-2", checksumA); + const [first] = await service.promotePending({ + batchSize: 1, + parserContractVersion: "3.0.0", + }); + expect(first).toMatchObject({ status: "promoted" }); + + // A second staging row with the SAME checksum (no source match) → checksum-duplicate. + await insertStaging("src-2b", "rep-2b", checksumA); + const [second] = await service.promotePending({ + batchSize: 1, + parserContractVersion: "3.0.0", + }); + expect(second).toMatchObject({ status: "duplicate" }); + + const evidence = await pool.query<{ + promotion_evidence: { duplicate_replay_id?: string }; + status: string; + }>( + "select status, promotion_evidence from ingest_staging_records where source_replay_id = 'rep-2b'", + ); + expect(evidence.rows[0]?.status).toBe("promoted"); + expect(evidence.rows[0]?.promotion_evidence.duplicate_replay_id).toEqual( + expect.any(String), + ); + }); + + it("3. same source identity, different bytes → status=conflicted + reason=source_identity_changed_bytes", async () => { + if (!infraReachable) { + return; + } + await insertStaging("src-3", "rep-3", checksumA); + await service.promotePending({ + batchSize: 1, + parserContractVersion: "3.0.0", + }); + // Synthetic conflict pair: SAME (source_system, source_replay_id), DIFFERENT bytes. + // The staging unique (source_system, source_replay_id) is enforced, so the conflict + // path is exercised by inserting a row that re-uses the identity via a fresh + // staging record after the first was promoted (the original staging row is gone + // from pending; insert a new identical-identity row to trigger the conflict branch). + await pool.query( + `insert into ingest_staging_records + (source_system, source_replay_id, object_key, checksum, size_bytes, replay_timestamp, promotion_evidence) + values ('src-3', 'rep-3', 'raw/rep-3-v2.ocap.json', $1, 123, $2, '{}'::jsonb) + on conflict (source_system, source_replay_id) do update + set status = 'pending', checksum = excluded.checksum, object_key = excluded.object_key`, + [checksumB, replayTs], + ); + const [conflict] = await service.promotePending({ + batchSize: 1, + parserContractVersion: "3.0.0", + }); + expect(conflict).toMatchObject({ status: "conflicted" }); + + const row = await pool.query<{ + conflict_details: { reason?: string }; + status: string; + }>( + "select status, conflict_details from ingest_staging_records where source_replay_id = 'rep-3'", + ); + expect(row.rows[0]?.status).toBe("conflicted"); + expect(row.rows[0]?.conflict_details.reason).toBe( + "source_identity_changed_bytes", + ); + }); + + it("4. re-delivering the same parse.completed records terminal state once (idempotent)", async () => { + if (!infraReachable) { + return; + } + const { jobId, replayId } = await promoteAndPublish("src-4", "rep-4"); + const completed = { + artifact: { + bucket: config.s3.bucket, + key: `artifacts/v3/${replayId}.json`, + }, + artifact_checksum: { algorithm: "sha256" as const, value: checksumB }, + artifact_size_bytes: 1234, + job_id: jobId, + parser_contract_version: "3.0.0", + replay_id: replayId, + source_checksum: { algorithm: "sha256" as const, value: checksumA }, + }; + // First record → returns a parser_result id; second is a no-op (terminal job). + expect(await repository.recordParserCompleted(completed)).toEqual( + expect.any(String), + ); + expect(await repository.recordParserCompleted(completed)).toBeNull(); + const current = await pool.query( + "select 1 from parser_results where replay_id = $1 and status = 'current'", + [replayId], + ); + expect(current.rows).toHaveLength(1); + }); + + it("5. parse.failed records terminal failed state once (idempotent, synthetic message)", async () => { + if (!infraReachable) { + return; + } + const { jobId, replayId } = await promoteAndPublish("src-5", "rep-5"); + // Hand-built parse.failed message (documented synthetic exception — never round-trip + // a throwing artifact through the broker). + const failed = { + failure: { + error_code: "schema.unsupported", + message: "unsupported", + retryability: "not_retryable" as const, + stage: "schema", + }, + job_id: { state: "present" as const, value: jobId }, + message_type: "parse.failed" as const, + parser_contract_version: { state: "present" as const, value: "3.0.0" }, + replay_id: { state: "present" as const, value: replayId }, + }; + expect(await repository.recordParserFailed(failed)).toBe(true); + expect(await repository.recordParserFailed(failed)).toBe(false); + const job = await pool.query<{ status: string }>( + "select status from parse_jobs where id = $1", + [jobId], + ); + expect(job.rows[0]?.status).toBe("failed"); + }); + + it("6. admin role-gate: 401 without session, 403 without role, 2xx with admin role", async () => { + if (!infraReachable) { + return; + } + const users = new InMemoryAuthUserRepository(), + sessions = new InMemorySessionStore(), + auth: AuthRouteOptions = { + ...createDefaultAuthOptions(), + sessions, + users, + }, + app = await buildApp({ auth }); + try { + // No session → 401 (requireRole UNAUTHORIZED). + const anon = await app.inject({ + method: "POST", + payload: rotationBody(), + url: "/admin/rotations", + }); + expect(anon.statusCode).toBe(401); + + // Session for a user WITHOUT the admin role → 403 (requireRole FORBIDDEN). + const member = await users.upsertSteamUser({ + displayName: "Member", + steamId: "steam-member", + }), + memberSession = await sessions.create(member.id, 3600); + const forbidden = await app.inject({ + headers: { cookie: `solid_stats_session=${memberSession.id}` }, + method: "POST", + payload: rotationBody(), + url: "/admin/rotations", + }); + expect(forbidden.statusCode).toBe(403); + + // Session for a user WITH the admin role → 2xx via the shared requireRole pre-handler. + const adminUser = await users.upsertSteamUser({ + displayName: "Admin", + steamId: "steam-admin", + }); + await users.setUserRoles(adminUser.id, ["admin"]); + const adminSession = await sessions.create(adminUser.id, 3600); + const allowed = await app.inject({ + headers: { cookie: `solid_stats_session=${adminSession.id}` }, + method: "POST", + payload: rotationBody(), + url: "/admin/rotations", + }); + expect(allowed.statusCode).toBeLessThan(300); + expect(allowed.statusCode).toBeGreaterThanOrEqual(200); + } finally { + await app.close(); + } + }); +}); + +function rotationBody(): { endsAt: null; name: string; startsAt: string } { + return { + endsAt: null, + name: `gate-${Date.now().toString(36)}`, + startsAt: "2026-06-01T00:00:00.000Z", + }; +} + +async function insertStaging( + sourceSystem: string, + sourceReplayId: string, + checksum: string, +): Promise { + await pool.query( + `insert into ingest_staging_records + (source_system, source_replay_id, object_key, checksum, size_bytes, replay_timestamp, promotion_evidence) + values ($1, $2, $3, $4, 123, $5, '{}'::jsonb)`, + [ + sourceSystem, + sourceReplayId, + `raw/${sourceReplayId}.ocap.json`, + checksum, + replayTs, + ], + ); +} + +async function promoteAndPublish( + sourceSystem: string, + sourceReplayId: string, +): Promise<{ jobId: string; replayId: string }> { + await insertStaging(sourceSystem, sourceReplayId, checksumA); + await service.promotePending({ + batchSize: 1, + parserContractVersion: "3.0.0", + }); + const job = await pool.query<{ id: string; replay_id: string }>( + "select id, replay_id from parse_jobs order by created_at desc limit 1", + ); + const jobId = job.rows[0]?.id ?? "", + replayId = job.rows[0]?.replay_id ?? ""; + await pool.query("update parse_jobs set status = 'published' where id = $1", [ + jobId, + ]); + return { jobId, replayId }; +} diff --git a/src/test/golden/pipeline.golden.test.ts b/src/test/golden/pipeline.golden.test.ts new file mode 100644 index 0000000..0f90a46 --- /dev/null +++ b/src/test/golden/pipeline.golden.test.ts @@ -0,0 +1,231 @@ +/* eslint-disable camelcase, init-declarations, no-magic-numbers, no-inline-comments, unicorn/no-null, @typescript-eslint/no-unnecessary-condition */ +// +// pipeline.golden.test.ts — full-chain characterization oracle. +// +// Drives the REAL production path per floor artifact, through the SAME factories +// server.ts wires (never a hand-mirrored copy): +// promote → durable parse_jobs row → real RabbitMQ publish → real broker delivery +// → parse.completed consumer → S3 artifact load → recordParserCompleted → recalc +// → assert the normalized full observable surface incl. GET /stats/* via app.inject. +// +// [tests Integration Harness] real PG+RabbitMQ+S3 via docker-compose, runMigrations(), +// truncate…cascade isolation. [testing-standards §B] no mocked contract boundary — +// a mock here would hide exactly the drift this oracle exists to catch. [conv queue +// reliability] assert the durable parse_jobs row exists BEFORE the publish. +import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3"; +import { Pool } from "pg"; +import { afterAll, beforeAll, beforeEach, describe, expect, it } from "vitest"; + +import { buildApp } from "../../app.js"; +import { runMigrations } from "../../infra/db/migrate.js"; +import { + createRabbitMqParserRuntime, + type RabbitMqParserRuntime, +} from "../../infra/queue/rabbitmq.js"; +import { createStorageClient } from "../../infra/storage/client.js"; +import { ParseJobPublisher } from "../../modules/ingest/publisher.js"; +import { PgIngestRepository } from "../../modules/ingest/repository/repository.js"; +import { IngestPromotionService } from "../../modules/ingest/service.js"; +import { PgPublicStatsReadModel } from "../../modules/public-stats/repository.js"; +import { PgStatisticsRepository } from "../../modules/statistics/repository/repository.js"; +import { ParserResultRecalculationService } from "../../modules/statistics/service/recalculation.js"; + +import { + TRUNCATE_ALL, + completedMessage, + pollUntil, + publishCompleted, + purgeParserQueues, + snapshotSurface, +} from "./fixtures/harness.js"; +import { + archivePresent, + goldenConfig, + goldenInfraReachable, + loadGoldenArtifacts, + type GoldenArtifact, +} from "./fixtures/loader.js"; + +import type { FastifyInstance } from "fastify"; + +const config = goldenConfig(), + pool = new Pool({ connectionString: config.databaseUrl }), + runId = Date.now().toString(36), + // replay_ids whose FULL chain (record + recalc) the consumer has finished. + completedReplays = new Set(), + // Load the fixture table SYNCHRONOUSLY at collection time so test.each has a + // concrete table; runtime infra-absence is handled by it.runIf(infraReachable). + fixtures: GoldenArtifact[] = archivePresent() ? loadGoldenArtifacts() : []; + +let infraReachable = false, + broker: RabbitMqParserRuntime, + storage: ReturnType, + s3: S3Client, + app: FastifyInstance, + statisticsRepository: PgStatisticsRepository, + ingestRepository: PgIngestRepository, + promotionService: IngestPromotionService, + publisher: ParseJobPublisher; + +beforeAll(async () => { + infraReachable = await goldenInfraReachable(config); + if (!infraReachable) { + return; + } + await runMigrations(config.databaseUrl); + await purgeParserQueues(config.rabbitmqUrl); + ingestRepository = new PgIngestRepository(pool); + statisticsRepository = new PgStatisticsRepository(pool); + promotionService = new IngestPromotionService(ingestRepository); + storage = createStorageClient(config); + broker = await createRabbitMqParserRuntime(config); + publisher = new ParseJobPublisher(ingestRepository, broker); + s3 = new S3Client({ + credentials: { + accessKeyId: config.s3.accessKeyId, + secretAccessKey: config.s3.secretAccessKey, + }, + endpoint: config.s3.endpoint, + forcePathStyle: config.s3.forcePathStyle, + region: config.s3.region, + }); + const recalculation = new ParserResultRecalculationService( + statisticsRepository, + ); + // Real consumer wired exactly as runtime.ts does it. We additionally record the + // replay_id once the WHOLE chain (recordParserCompleted + recalc) has finished, so + // the poll waits for recalc to persist parser_events — not merely for the + // parser_results row recordParserCompleted writes first (avoids a read race). + await broker.consumeParserResults({ + completed: async (message) => { + const artifact = await storage.loadParserArtifact(message.artifact), + parserResultId = await ingestRepository.recordParserCompleted({ + ...message, + rawSnapshot: artifact, + }); + if (parserResultId !== null) { + await recalculation.recalculateParserResult(parserResultId, artifact); + } + completedReplays.add(message.replay_id); + }, + failed: () => Promise.resolve(), + }); + app = await buildApp({ + publicStatsReadModel: new PgPublicStatsReadModel(pool), + }); +}); + +afterAll(async () => { + await broker?.close(); + await app?.close(); + s3?.destroy(); + await storage?.close(); + await pool.end(); +}); + +beforeEach(async () => { + if (infraReachable) { + await pool.query(TRUNCATE_ALL); + } +}); + +describe.skipIf(!archivePresent())("golden pipeline oracle", () => { + it("skips the live chain cleanly when docker-compose is absent", () => { + // A Docker-less run is a clean pass, not a failure — the real-chain block below + // is gated on infraReachable so it never runs (and never fails) without infra. + expect(typeof infraReachable).toBe("boolean"); + }); + + it.each(fixtures)( + "pins the full ingest→stats surface for $name", + async ({ name, artifact }: GoldenArtifact) => { + if (!infraReachable) { + return; // clean skip — docker-compose absent (verify stays green) + } + // Stable, name-keyed staging values keep the snapshot deterministic across + // runs; per-case isolation comes from truncate…cascade + queue purge, not from + // a per-run id. The runId only namespaces the live S3 object so concurrent runs + // never collide on the bucket, and it is normalized out of the snapshot. + const objectKey = `artifacts/v3/${runId}/${name}.json`, + sourceReplayId = `golden-${name}`, + checksum = `${"0".repeat(63)}1`; + + // (1) stage + upload artifact bytes to a UNIQUE S3 key (real S3). + await s3.send( + new PutObjectCommand({ + Body: JSON.stringify(artifact), + Bucket: config.s3.bucket, + Key: objectKey, + }), + ); + const staging = await pool.query<{ id: string }>( + `insert into ingest_staging_records + (source_system, source_replay_id, object_key, checksum, size_bytes, replay_timestamp, promotion_evidence) + values ('golden', $1, $2, $3, 123, '2026-05-09T00:00:00.000Z', '{}'::jsonb) + returning id`, + [sourceReplayId, objectKey, checksum], + ); + expect(staging.rows).toHaveLength(1); + + // (2) promote (durable parse_jobs row created in-tx) THEN publish. + const [promotion] = await promotionService.promotePending({ + batchSize: 1, + parserContractVersion: "3.0.0", + }); + expect(promotion).toMatchObject({ status: "promoted" }); + + const jobBefore = await pool.query<{ + id: string; + replay_id: string; + status: string; + }>("select id, replay_id, status from parse_jobs"); + // [conv queue reliability] durable job exists BEFORE any parse.requested publish. + expect(jobBefore.rows).toHaveLength(1); + expect(jobBefore.rows[0]?.status).toBe("queued"); + const jobId = jobBefore.rows[0]?.id ?? "", + replayId = jobBefore.rows[0]?.replay_id ?? ""; + + const requested = await publisher.publishQueued({ batchSize: 1 }); + expect(requested).toHaveLength(1); + expect(requested[0]).toMatchObject({ + job_id: jobId, + parser_contract_version: "3.0.0", + replay_id: replayId, + }); + + // (3) publish a real parse.completed through the live broker. + await publishCompleted( + config.rabbitmqUrl, + completedMessage({ + bucket: config.s3.bucket, + checksum, + jobId, + objectKey, + replayId, + }), + ); + + // (4) bounded-poll to terminal succeeded — the HARD timeout is the only + // backstop against the consumer's nack-requeue loop (RESEARCH §await-seam). + await pollUntil(async () => { + const result = await pool.query<{ status: string }>( + "select status from parse_jobs where id = $1", + [jobId], + ); + return ( + result.rows[0]?.status === "succeeded" && + completedReplays.has(replayId) + ); + }); + + // (5) assert the normalized full observable surface as a file snapshot. + const surface = await snapshotSurface(pool, app); + await expect(JSON.stringify(surface, null, 2)).toMatchFileSnapshot( + `./__snapshots__/pipeline-${name}.snap.json`, + ); + }, + // Per-test ceiling above the 30s bounded poll so a live-broker round-trip does + // not false-fail on the default 5s vitest timeout. + 40_000, + ); +}); diff --git a/src/test/golden/scripts/build-floor-archive.sh b/src/test/golden/scripts/build-floor-archive.sh new file mode 100755 index 0000000..9a5a7fd --- /dev/null +++ b/src/test/golden/scripts/build-floor-archive.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# +# build-floor-archive.sh — produce the COMMITTABLE floor corpus for the golden oracle. +# +# Runs the replay-parser-2 CLI over its own golden OCAP corpus and packs the emitted +# real ParseArtifact JSONs into src/test/golden/fixtures/artifacts.tar.gz. This is the +# floor so the golden oracle is NEVER empty without VPS access (the hundreds-from-prod +# capture is capture-artifacts.sh and is gated/master-only). +# +# Only `success`/`partial` artifacts are packed: the server-2 parse.completed consumer +# nack-requeues on any throw (rabbitmq.ts:126-128) → a `failed`/`invalid-json` artifact +# fed through the real broker would redeliver forever. The conflict and parse.failed +# invariants use synthetic non-broker paths instead. +# +# Usage: +# bash src/test/golden/scripts/build-floor-archive.sh [PARSER_REPO_DIR] +# +# PARSER_REPO_DIR defaults to ../replay-parser-2 relative to this server-2 repo. +# Requires: cargo (to build the replay-parser-2 CLI), tar, gzip. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SERVER_REPO="$(cd "${SCRIPT_DIR}/../../../.." && pwd)" +FIXTURES_DIR="${SERVER_REPO}/src/test/golden/fixtures" +ARCHIVE="${FIXTURES_DIR}/artifacts.tar.gz" + +PARSER_REPO="${1:-${SERVER_REPO}/../replay-parser-2}" +CORPUS_DIR="${PARSER_REPO}/crates/parser-core/tests/fixtures" + +if ! command -v cargo >/dev/null 2>&1; then + echo "ERROR: cargo not found. Install Rust toolchain to build the replay-parser-2 CLI." >&2 + echo " The committed artifacts.tar.gz remains the floor until this can run." >&2 + exit 1 +fi + +if [ ! -d "${CORPUS_DIR}" ]; then + echo "ERROR: parser-2 corpus not found at ${CORPUS_DIR}" >&2 + echo " Pass the replay-parser-2 repo dir as the first argument." >&2 + exit 1 +fi + +# success/partial OCAP inputs only (invalid-json → status:failed is EXCLUDED — see header). +INPUTS=( + valid-minimal + metadata-drift + killed-events + side-facts + vehicle-context + aggregate-combat + combat-events + duplicate-slot-same-name + connected-backfill +) + +WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/golden-floor-XXXXXX")" +trap 'rm -rf "${WORK_DIR}"' EXIT + +echo "Building replay-parser-2 CLI (cargo build --release)…" +cargo build --release --manifest-path "${PARSER_REPO}/Cargo.toml" --bin replay-parser-2 + +CLI="${PARSER_REPO}/target/release/replay-parser-2" + +captured=0 +skipped=0 +for name in "${INPUTS[@]}"; do + input="${CORPUS_DIR}/${name}.ocap.json" + if [ ! -f "${input}" ]; then + echo " skip (missing input): ${name}" + skipped=$((skipped + 1)) + continue + fi + output="${WORK_DIR}/${name}.json" + # The CLI takes INPUT as a positional arg and --output as a flag + # (replay-parser-2 parse [OPTIONS] --output ). + if "${CLI}" parse --output "${output}" "${input}"; then + echo " captured: ${name}" + captured=$((captured + 1)) + else + echo " skip (parse non-zero exit): ${name}" + skipped=$((skipped + 1)) + fi +done + +if [ "${captured}" -eq 0 ]; then + echo "ERROR: produced zero artifacts — refusing to write an empty floor archive." >&2 + exit 1 +fi + +mkdir -p "${FIXTURES_DIR}" +tar czf "${ARCHIVE}" -C "${WORK_DIR}" . + +echo "Floor archive written: ${ARCHIVE}" +echo "captured=${captured} skipped=${skipped}" diff --git a/src/test/golden/scripts/capture-artifacts.sh b/src/test/golden/scripts/capture-artifacts.sh new file mode 100755 index 0000000..337fd95 --- /dev/null +++ b/src/test/golden/scripts/capture-artifacts.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +# +# capture-artifacts.sh — GATED capture of the FULL production parser-artifact corpus. +# +# Pulls the REAL production parser artifacts from the VPS S3 bucket and packs them into +# src/test/golden/fixtures/artifacts.tar.gz (replacing / augmenting the committed floor). +# The agent has no VPS access, so a HUMAN runs this once under `!`. The live full-corpus +# run is a master-only CI step; the committed floor (build-floor-archive.sh) keeps the +# oracle non-empty until then. +# +# Inputs (env only — NO host/key/cred values are ever hardcoded or committed): +# VPS_S3_ENDPOINT S3/MinIO endpoint URL hosting the bucket (required) +# VPS_S3_BUCKET artifact bucket name (default: solid-replays) +# VPS_S3_ACCESS_KEY_ID S3 access key (required) +# VPS_S3_SECRET_ACCESS_KEY S3 secret key (required) +# VPS_S3_PREFIX object key prefix to pull (default: artifacts/v3/) +# GOLDEN_CAPTURE_LIMIT optional cap on the number of objects (default: unlimited) +# GOLDEN_FOLD_FLOOR "1" to also fold in the parser-2 floor (default: 1) +# +# Usage: +# VPS_S3_ENDPOINT=https:// VPS_S3_ACCESS_KEY_ID=… \ +# VPS_S3_SECRET_ACCESS_KEY=… bash src/test/golden/scripts/capture-artifacts.sh +# +# Requires: the MinIO client `mc` (preferred) OR the AWS CLI `aws`, plus tar/gzip. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SERVER_REPO="$(cd "${SCRIPT_DIR}/../../../.." && pwd)" +FIXTURES_DIR="${SERVER_REPO}/src/test/golden/fixtures" +ARCHIVE="${FIXTURES_DIR}/artifacts.tar.gz" + +VPS_S3_BUCKET="${VPS_S3_BUCKET:-solid-replays}" +VPS_S3_PREFIX="${VPS_S3_PREFIX:-artifacts/v3/}" +GOLDEN_CAPTURE_LIMIT="${GOLDEN_CAPTURE_LIMIT:-0}" +GOLDEN_FOLD_FLOOR="${GOLDEN_FOLD_FLOOR:-1}" + +# --- Happ VPN bypass reminder (global memory happ-vpn-bypass-for-servers) ------------- +# Happ VPN is always-on; traffic to your own VPS must bypass it via an `ip rule`, or the +# S3/SSH connection hangs. Ensure that bypass is active for the VPS_S3_ENDPOINT host +# BEFORE running this script. +echo "REMINDER: ensure the Happ VPN ip-rule bypass for the VPS S3 host is active," +echo " otherwise mc/aws S3 over the VPN will hang (global memory:" +echo " happ-vpn-bypass-for-servers)." + +# --- Validate required env (fail loudly) ---------------------------------------------- +missing=() +[ -z "${VPS_S3_ENDPOINT:-}" ] && missing+=("VPS_S3_ENDPOINT") +[ -z "${VPS_S3_ACCESS_KEY_ID:-}" ] && missing+=("VPS_S3_ACCESS_KEY_ID") +[ -z "${VPS_S3_SECRET_ACCESS_KEY:-}" ] && missing+=("VPS_S3_SECRET_ACCESS_KEY") +if [ "${#missing[@]}" -gt 0 ]; then + echo "ERROR: missing required env: ${missing[*]}" >&2 + exit 1 +fi + +WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/golden-capture-XXXXXX")" +trap 'rm -rf "${WORK_DIR}"' EXIT +OBJECTS_DIR="${WORK_DIR}/objects" +mkdir -p "${OBJECTS_DIR}" + +# --- Pull objects: prefer mc, fall back to aws ---------------------------------------- +if command -v mc >/dev/null 2>&1; then + echo "Using MinIO client (mc) to pull s3://${VPS_S3_BUCKET}/${VPS_S3_PREFIX}…" + MC_ALIAS="goldencapture" + mc alias set "${MC_ALIAS}" "${VPS_S3_ENDPOINT}" \ + "${VPS_S3_ACCESS_KEY_ID}" "${VPS_S3_SECRET_ACCESS_KEY}" >/dev/null + mc cp --recursive \ + "${MC_ALIAS}/${VPS_S3_BUCKET}/${VPS_S3_PREFIX}" "${OBJECTS_DIR}/" + mc alias remove "${MC_ALIAS}" >/dev/null 2>&1 || true +elif command -v aws >/dev/null 2>&1; then + echo "Using AWS CLI (aws) to pull s3://${VPS_S3_BUCKET}/${VPS_S3_PREFIX}…" + AWS_ACCESS_KEY_ID="${VPS_S3_ACCESS_KEY_ID}" \ + AWS_SECRET_ACCESS_KEY="${VPS_S3_SECRET_ACCESS_KEY}" \ + aws --endpoint-url "${VPS_S3_ENDPOINT}" s3 cp --recursive \ + "s3://${VPS_S3_BUCKET}/${VPS_S3_PREFIX}" "${OBJECTS_DIR}/" +else + echo "ERROR: neither 'mc' nor 'aws' is installed — cannot pull S3 objects." >&2 + exit 1 +fi + +# --- Flatten + count, honoring an optional cap (never a SILENT cap) -------------------- +STAGE_DIR="${WORK_DIR}/stage" +mkdir -p "${STAGE_DIR}" +captured=0 +skipped=0 +while IFS= read -r -d '' file; do + if [ "${GOLDEN_CAPTURE_LIMIT}" -gt 0 ] && [ "${captured}" -ge "${GOLDEN_CAPTURE_LIMIT}" ]; then + skipped=$((skipped + 1)) + continue + fi + cp "${file}" "${STAGE_DIR}/capture-${captured}.json" + captured=$((captured + 1)) +done < <(find "${OBJECTS_DIR}" -type f -name '*.json' -print0) + +if [ "${captured}" -eq 0 ]; then + echo "ERROR: pulled zero artifact objects from s3://${VPS_S3_BUCKET}/${VPS_S3_PREFIX}" >&2 + echo " Check the bucket/prefix/creds and the VPN bypass, then retry." >&2 + exit 1 +fi + +# --- Optionally fold in the parser-2 floor so the corpus is a strict superset --------- +floor=0 +if [ "${GOLDEN_FOLD_FLOOR}" = "1" ] && [ -f "${SCRIPT_DIR}/build-floor-archive.sh" ]; then + echo "Folding in the parser-2 floor…" + if bash "${SCRIPT_DIR}/build-floor-archive.sh"; then + FLOOR_DIR="${WORK_DIR}/floor" + mkdir -p "${FLOOR_DIR}" + tar xzf "${ARCHIVE}" -C "${FLOOR_DIR}" + for floor_file in "${FLOOR_DIR}"/*.json; do + [ -f "${floor_file}" ] || continue + cp "${floor_file}" "${STAGE_DIR}/floor-$(basename "${floor_file}")" + floor=$((floor + 1)) + done + else + echo "WARNING: floor build failed; packing the captured corpus only." >&2 + fi +fi + +mkdir -p "${FIXTURES_DIR}" +tar czf "${ARCHIVE}" -C "${STAGE_DIR}" . + +echo "Capture archive written: ${ARCHIVE}" +echo "captured=${captured} skipped=${skipped} floor=${floor}"