From 90d5b894b8acced6868ae75304a7d8d446a578b4 Mon Sep 17 00:00:00 2001 From: cs01 Date: Tue, 21 Apr 2026 22:09:54 -0700 Subject: [PATCH 1/2] add --diagnostics type-trace flag + analyzer script --- scripts/analyze-diagnostics.cjs | 129 +++++++++++++++ src/chad-native.ts | 14 ++ src/chad-node.ts | 33 ++++ src/codegen/infrastructure/base-generator.ts | 16 +- .../infrastructure/generator-context.ts | 11 +- src/codegen/infrastructure/type-inference.ts | 20 ++- src/diagnostics/categories.ts | 26 +++ src/diagnostics/index.ts | 9 ++ src/diagnostics/sink.ts | 33 ++++ src/diagnostics/tracers.ts | 152 ++++++++++++++++++ 10 files changed, 435 insertions(+), 8 deletions(-) create mode 100644 scripts/analyze-diagnostics.cjs create mode 100644 src/diagnostics/categories.ts create mode 100644 src/diagnostics/index.ts create mode 100644 src/diagnostics/sink.ts create mode 100644 src/diagnostics/tracers.ts diff --git a/scripts/analyze-diagnostics.cjs b/scripts/analyze-diagnostics.cjs new file mode 100644 index 00000000..a04cb4c9 --- /dev/null +++ b/scripts/analyze-diagnostics.cjs @@ -0,0 +1,129 @@ +#!/usr/bin/env node +// Analyze type-trace JSONL produced by --diag-trace=type-trace. +// Usage: node scripts/analyze-diagnostics.cjs [path] +const fs = require("fs"); +const path = process.argv[2] || "chad-diagnostics.jsonl"; + +const lines = fs.readFileSync(path, "utf8").split("\n").filter(Boolean); +console.error(`Loaded ${lines.length} events from ${path}`); +const events = lines.map((l) => JSON.parse(l)); + +function pickRealSite(sites) { + if (!Array.isArray(sites)) return sites || "?"; + const skip = [ + /codegen\/infrastructure\/base-generator\.js/, + /codegen\/infrastructure\/generator-context\.js/, + /codegen\/infrastructure\/type-inference\.js/, + /codegen\/infrastructure\/ir-builders\.js/, + /diagnostics\/tracers\.js/, + ]; + for (const s of sites) { + if (skip.some((r) => r.test(s))) continue; + return s; + } + return sites[0] || "?"; +} +for (const e of events) e.site = pickRealSite(e.sites); + +const typeTrace = events.filter((e) => e.cat === "type-trace"); +const sets = typeTrace.filter((e) => e.k === "set"); +const gets = typeTrace.filter((e) => e.k === "get"); +const riches = typeTrace.filter((e) => e.k === "rich"); + +function countBy(arr, keyFn) { + const m = new Map(); + for (const x of arr) { + const k = keyFn(x); + m.set(k, (m.get(k) || 0) + 1); + } + return m; +} + +const setSiteCount = countBy(sets, (s) => s.site); +const getSiteCount = countBy(gets, (g) => g.site); +const richSiteCount = countBy(riches, (r) => r.site); +const richSiteNull = countBy( + riches.filter((r) => r.result === null), + (r) => r.site, +); + +// Orphan analysis: sets whose name is not read before being re-set. +const lastSetIdByName = new Map(); +const setReadCount = new Map(); +const orphanGets = []; +for (const e of typeTrace) { + if (e.k === "set") { + lastSetIdByName.set(e.name, e.i); + setReadCount.set(e.i, 0); + } else if (e.k === "get") { + const setId = lastSetIdByName.get(e.name); + if (setId !== undefined) { + setReadCount.set(setId, (setReadCount.get(setId) || 0) + 1); + } else if (e.result !== null) { + orphanGets.push(e); + } + } +} +const orphanSets = sets.filter((s) => (setReadCount.get(s.i) || 0) === 0); +const orphanSetSite = countBy(orphanSets, (s) => s.site); +const orphanGetSite = countBy(orphanGets, (g) => g.site); + +// Per-site orphan ratio (for sets that happen N times at site X, how many are orphans?) +const orphanRatioBySite = []; +for (const [site, count] of setSiteCount.entries()) { + if (count < 50) continue; + const orphans = orphanSetSite.get(site) || 0; + orphanRatioBySite.push({ site, count, orphans, ratio: orphans / count }); +} +orphanRatioBySite.sort((a, b) => b.ratio - a.ratio); + +const fmt = (m, n = 30) => + [...m.entries()] + .sort((a, b) => b[1] - a[1]) + .slice(0, n) + .map(([k, v]) => ` ${String(v).padStart(7)} ${k}`) + .join("\n"); + +const pct = (p, w) => (w ? ((100 * p) / w).toFixed(1) : "0.0") + "%"; + +console.log(`# Diagnostics type-trace analysis + +Total type-trace events: ${typeTrace.length} + setVariableType: ${sets.length} + getVariableType: ${gets.length} (${gets.filter((g) => g.result !== null).length} hits, ${gets.filter((g) => g.result === null).length} misses) + resolveRich: ${riches.length} (${riches.filter((r) => r.result === null).length} null results) + +## Top setVariableType call sites +${fmt(setSiteCount)} + +## Top getVariableType call sites (consumers) +${fmt(getSiteCount)} + +## Top resolveExpressionTypeRich call sites (consumers) +${fmt(richSiteCount)} + +## Orphan SETs (no subsequent read before next set of same name) — DROP CANDIDATES +Total orphan sets: ${orphanSets.length} (${pct(orphanSets.length, sets.length)} of all sets) + +${fmt(orphanSetSite)} + +## Orphan-set ratio per site (set-count >= 50) +${orphanRatioBySite + .slice(0, 30) + .map((r) => ` ${String(r.orphans).padStart(7)}/${String(r.count).padStart(7)} (${pct(r.orphans, r.count)}) ${r.site}`) + .join("\n")} + +## Orphan GETs (read returns non-null but never set in trace — comes from symbolTable) +Total: ${orphanGets.length} + +${fmt(orphanGetSite)} + +## resolveRich sites with highest null-result rate (count >= 10) +${[...richSiteCount.entries()] + .filter(([, c]) => c >= 10) + .map(([site, c]) => [site, c, richSiteNull.get(site) || 0]) + .sort((a, b) => b[2] / b[1] - a[2] / a[1]) + .slice(0, 20) + .map(([site, c, n]) => ` ${n}/${c} (${pct(n, c)}) ${site}`) + .join("\n")} +`); diff --git a/src/chad-native.ts b/src/chad-native.ts index 72b2ba1d..4f45677c 100644 --- a/src/chad-native.ts +++ b/src/chad-native.ts @@ -118,6 +118,20 @@ parser.addScopedOption( "", "build,run", ); +parser.addScopedOption( + "diag-trace", + "", + "Enable diagnostic trace categories (csv), e.g. 'type-trace'", + "", + "build,run,ir", +); +parser.addScopedOption( + "diag-trace-out", + "", + "Output path for diagnostic trace JSONL (default: chad-diagnostics.jsonl)", + "", + "build,run,ir", +); parser.addPositional("input", "Input .ts or .js file"); parser.parse(process.argv); diff --git a/src/chad-node.ts b/src/chad-node.ts index 7d2bd403..1b69fd94 100644 --- a/src/chad-node.ts +++ b/src/chad-node.ts @@ -32,6 +32,9 @@ import * as fs from "fs"; import { execSync, spawn as spawnProc, ChildProcess } from "child_process"; import { installTargetSDK, listInstalledSDKs, getSDKBaseDir } from "./cross-compile.js"; import { VERSION } from "./version.js"; +import { enableSink, flushDiagnostics } from "./diagnostics/sink.js"; +import { parseCategories, CAT_TYPE_TRACE } from "./diagnostics/categories.js"; +import { enableTypeTrace } from "./diagnostics/tracers.js"; const parser = new ArgumentParser("chad", "compile TypeScript to native binaries via LLVM"); parser.setColorEnabled(process.stdout.isTTY === true); @@ -96,6 +99,20 @@ parser.addScopedOption( "", "build,run", ); +parser.addScopedOption( + "diag-trace", + "", + "Enable diagnostic trace categories (csv), e.g. 'type-trace'", + "", + "build,run,ir", +); +parser.addScopedOption( + "diag-trace-out", + "", + "Output path for diagnostic trace JSONL (default: chad-diagnostics.jsonl)", + "", + "build,run,ir", +); parser.addPositional("input", "Input .ts or .js file"); // Node's process.argv includes [node, script, ...] — skip both. @@ -302,6 +319,19 @@ if (parser.getFlag("verbose")) logLevel = LogLevel_Verbose; if (parser.getFlag("debug")) logLevel = LogLevel_Debug; if (parser.getFlag("trace")) logLevel = LogLevel_Trace; +const diagTraceCsv = parser.getOption("diag-trace"); +if (diagTraceCsv) { + const cats = parseCategories(diagTraceCsv); + const outPath = parser.getOption("diag-trace-out") || "chad-diagnostics.jsonl"; + enableSink(outPath); + process.on("exit", () => { + flushDiagnostics(); + }); + for (const c of cats) { + if (c === CAT_TYPE_TRACE) enableTypeTrace(); + } +} + if (parser.getFlag("skip-semantic-analysis")) setSkipSemanticAnalysis(true); if (parser.getFlag("keep-temps")) setKeepTemps(true); const diagFormat = parser.getOption("diagnostics"); @@ -396,10 +426,13 @@ if (diagFormat === "json") { compile(inputFile, outputFile, logLevel); } catch (error) { logger.error((error as Error).message); + flushDiagnostics(); process.exit(1); } } +flushDiagnostics(); + if (command === "run") { const bin = path.resolve(outputFile); if (!fs.existsSync(bin)) { diff --git a/src/codegen/infrastructure/base-generator.ts b/src/codegen/infrastructure/base-generator.ts index 42ad7c7f..d4ce38c4 100644 --- a/src/codegen/infrastructure/base-generator.ts +++ b/src/codegen/infrastructure/base-generator.ts @@ -25,6 +25,7 @@ import { SymbolKind_UrlSearchParams, } from "./symbol-table.js"; import type { ResolvedType } from "./type-system.js"; +import { traceTypeSet, traceTypeGet } from "../../diagnostics/tracers.js"; export { SymbolTable, @@ -589,13 +590,21 @@ export class BaseGenerator { * Checks SymbolTable for named variables, then variableTypes for temporary registers */ getVariableType(name: string): string | undefined { - if (!name) return undefined; + if (!name) { + traceTypeGet(name, undefined); + return undefined; + } // Check named variables in SymbolTable first const symbolType = this.symbolTable.getType(name); - if (symbolType) return symbolType; + if (symbolType) { + traceTypeGet(name, symbolType); + return symbolType; + } // Fall back to temporary register types - return this.variableTypes.get(name); + const t = this.variableTypes.get(name); + traceTypeGet(name, t); + return t; } /** @@ -614,6 +623,7 @@ export class BaseGenerator { `Cannot set type 'unknown' for register '${name}'. Type inference failed in the codegen pipeline.`, ); } + traceTypeSet(name, type); this.variableTypes.set(name, type); } diff --git a/src/codegen/infrastructure/generator-context.ts b/src/codegen/infrastructure/generator-context.ts index 56487690..9ba88f51 100644 --- a/src/codegen/infrastructure/generator-context.ts +++ b/src/codegen/infrastructure/generator-context.ts @@ -49,6 +49,7 @@ import type { JsonObjectMeta } from "../expressions/access/member.js"; import type { DiagnosticEngine } from "../../diagnostics/engine.js"; import { TypeContext } from "./type-context.js"; import { classifyTerminator } from "./terminator-classifier.js"; +import { traceTypeSet, traceTypeGet } from "../../diagnostics/tracers.js"; interface ExprBase { type: string; @@ -1624,10 +1625,15 @@ export class MockGeneratorContext implements IGeneratorContext { getVariableType(name: string): string | undefined { // Check named variables in SymbolTable first const symbolType = this.symbolTable.getType(name); - if (symbolType) return symbolType; + if (symbolType) { + traceTypeGet(name, symbolType); + return symbolType; + } // Fall back to temporary register types - return this.variableTypes.get(name); + const t = this.variableTypes.get(name); + traceTypeGet(name, t); + return t; } hasVariableType(name: string): boolean { @@ -1635,6 +1641,7 @@ export class MockGeneratorContext implements IGeneratorContext { } setVariableType(name: string, type: string): void { + traceTypeSet(name, type); this.variableTypes.set(name, type); } diff --git a/src/codegen/infrastructure/type-inference.ts b/src/codegen/infrastructure/type-inference.ts index 4cd46505..de9d4b5d 100644 --- a/src/codegen/infrastructure/type-inference.ts +++ b/src/codegen/infrastructure/type-inference.ts @@ -40,6 +40,7 @@ import type { ArrayStorageStrategy, } from "./type-system.js"; import type { TypeContext } from "./type-context.js"; +import { traceTypeRich } from "../../diagnostics/tracers.js"; interface ExprBase { type: string; @@ -155,22 +156,35 @@ export class TypeInference { // etc.) never gain stray enrichment. Fields are eager in P1a; lazy-getter // optimization deferred to P1b — callers today only invoke this on the hot path. resolveExpressionTypeRich(expr: Expression): ResolvedType | null { + const exprTypeTag: string = + expr && typeof expr === "object" ? (expr as ExprBase).type || "" : ""; if (expr && typeof expr === "object" && this.isCacheableExprType((expr as ExprBase).type)) { const cached = this.richCacheLookup(expr); - if (cached) return cached; + if (cached) { + traceTypeRich(exprTypeTag, cached.base || null); + return cached; + } const baseType = this.resolveExpressionType(expr); - if (!baseType) return null; + if (!baseType) { + traceTypeRich(exprTypeTag, null); + return null; + } const enriched = this.enrichResolvedType(baseType); this.populateArrayStorage(enriched, expr); if (enriched.base && enriched.sourceKind && enriched.sourceKind !== "unknown") { this.richCacheStore(expr, enriched); } + traceTypeRich(exprTypeTag, enriched.base || null); return enriched; } const baseType = this.resolveExpressionType(expr); - if (!baseType) return null; + if (!baseType) { + traceTypeRich(exprTypeTag, null); + return null; + } const enriched = this.enrichResolvedType(baseType); this.populateArrayStorage(enriched, expr); + traceTypeRich(exprTypeTag, enriched.base || null); return enriched; } diff --git a/src/diagnostics/categories.ts b/src/diagnostics/categories.ts new file mode 100644 index 00000000..8a8d15d3 --- /dev/null +++ b/src/diagnostics/categories.ts @@ -0,0 +1,26 @@ +export const CAT_TYPE_TRACE = "type-trace"; + +export const KNOWN_CATEGORIES: string[] = [CAT_TYPE_TRACE]; + +export function parseCategories(csv: string): string[] { + const result: string[] = []; + if (!csv) return result; + const parts = csv.split(","); + for (const raw of parts) { + const name = raw.trim(); + if (!name) continue; + let known = false; + for (const k of KNOWN_CATEGORIES) { + if (k === name) { + known = true; + break; + } + } + if (!known) { + console.error("warning: unknown diagnostic category " + name); + continue; + } + result.push(name); + } + return result; +} diff --git a/src/diagnostics/index.ts b/src/diagnostics/index.ts new file mode 100644 index 00000000..25b97ea3 --- /dev/null +++ b/src/diagnostics/index.ts @@ -0,0 +1,9 @@ +export { enableSink, isSinkEnabled, recordEvent, flushDiagnostics } from "./sink.js"; +export { CAT_TYPE_TRACE, KNOWN_CATEGORIES, parseCategories } from "./categories.js"; +export { + enableTypeTrace, + isTypeTraceEnabled, + traceTypeSet, + traceTypeGet, + traceTypeRich, +} from "./tracers.js"; diff --git a/src/diagnostics/sink.ts b/src/diagnostics/sink.ts new file mode 100644 index 00000000..994f5f85 --- /dev/null +++ b/src/diagnostics/sink.ts @@ -0,0 +1,33 @@ +import * as fs from "fs"; + +let diagSinkEnabled = false; +let diagSinkPath = ""; +let diagSinkBuffer: string[] = []; +let diagSinkFlushed = false; + +export function enableSink(path: string): void { + diagSinkEnabled = true; + diagSinkPath = path; + diagSinkBuffer = []; + diagSinkFlushed = false; +} + +export function isSinkEnabled(): boolean { + return diagSinkEnabled; +} + +// Push a pre-serialized JSON event line. Callers build the JSON string +// themselves — we keep the sink dumb so the native compiler doesn't need +// to marshal arbitrary record types through JSON.stringify. +export function recordEvent(line: string): void { + if (!diagSinkEnabled) return; + diagSinkBuffer.push(line); +} + +export function flushDiagnostics(): void { + if (!diagSinkEnabled) return; + if (diagSinkFlushed) return; + diagSinkFlushed = true; + const out = diagSinkBuffer.length > 0 ? diagSinkBuffer.join("\n") + "\n" : ""; + fs.writeFileSync(diagSinkPath, out); +} diff --git a/src/diagnostics/tracers.ts b/src/diagnostics/tracers.ts new file mode 100644 index 00000000..9486ce8f --- /dev/null +++ b/src/diagnostics/tracers.ts @@ -0,0 +1,152 @@ +import { recordEvent } from "./sink.js"; +import { CAT_TYPE_TRACE } from "./categories.js"; + +let diagTypeTraceEnabled = false; +let diagSeq = 0; + +export function enableTypeTrace(): void { + diagTypeTraceEnabled = true; +} + +export function isTypeTraceEnabled(): boolean { + return diagTypeTraceEnabled; +} + +// JSON-string-escape a value. Used instead of JSON.stringify on record types +// (the native compiler cannot stringify `Record`). +function jsonEscapeString(s: string): string { + let out = '"'; + for (let i = 0; i < s.length; i++) { + const c = s.charCodeAt(i); + if (c === 0x22) out += '\\"'; + else if (c === 0x5c) out += "\\\\"; + else if (c === 0x0a) out += "\\n"; + else if (c === 0x0d) out += "\\r"; + else if (c === 0x09) out += "\\t"; + else if (c < 0x20) { + const hex = c.toString(16); + let padded = hex; + while (padded.length < 4) padded = "0" + padded; + out += "\\u" + padded; + } else { + out += s.charAt(i); + } + } + out += '"'; + return out; +} + +function sitesToJsonArray(sites: string[]): string { + let out = "["; + for (let i = 0; i < sites.length; i++) { + if (i > 0) out += ","; + out += jsonEscapeString(sites[i]); + } + out += "]"; + return out; +} + +// Extract compact callsites from a V8 stack trace string. Skips the top few +// frames (the Error constructor + this tracer + the immediate wrapper that +// invoked us), then returns up to 6 frames formatted as short paths relative +// to /dist/. This runs under Node only — chad-native never enables the +// tracer, so the body is unreachable there. +function captureSites(skip: number): string[] { + let stack = ""; + try { + throw new Error("diag-stack"); + } catch (e) { + const s = (e as { stack?: string }).stack; + if (s) stack = s; + } + if (!stack) return []; + const lines = stack.split("\n"); + const out: string[] = []; + let idx = 0; + for (const raw of lines) { + const line = raw.trim(); + if (!line.startsWith("at ")) continue; + if (idx < skip) { + idx++; + continue; + } + idx++; + // Extract "file:line:col" portion, dropping any surrounding parens. + let loc = line; + const lp = line.lastIndexOf("("); + const rp = line.lastIndexOf(")"); + if (lp >= 0 && rp > lp) loc = line.substring(lp + 1, rp); + else { + // "at path:line:col" — strip leading "at " + loc = line.substring(3); + } + // Trim to portion starting at /dist/ if present. + const distIdx = loc.indexOf("/dist/"); + if (distIdx >= 0) loc = loc.substring(distIdx + 6); + out.push(loc); + if (out.length >= 6) break; + } + return out; +} + +export function traceTypeSet(name: string, type: string): void { + if (!diagTypeTraceEnabled) return; + const sites = captureSites(2); + const i = diagSeq; + diagSeq = diagSeq + 1; + const line = + '{"cat":' + + jsonEscapeString(CAT_TYPE_TRACE) + + ',"k":"set","i":' + + i.toString() + + ',"name":' + + jsonEscapeString(name) + + ',"type":' + + jsonEscapeString(type) + + ',"sites":' + + sitesToJsonArray(sites) + + "}"; + recordEvent(line); +} + +export function traceTypeGet(name: string, result: string | undefined): void { + if (!diagTypeTraceEnabled) return; + const sites = captureSites(2); + const i = diagSeq; + diagSeq = diagSeq + 1; + const resultJson = result === undefined ? "null" : jsonEscapeString(result); + const line = + '{"cat":' + + jsonEscapeString(CAT_TYPE_TRACE) + + ',"k":"get","i":' + + i.toString() + + ',"name":' + + jsonEscapeString(name) + + ',"result":' + + resultJson + + ',"sites":' + + sitesToJsonArray(sites) + + "}"; + recordEvent(line); +} + +export function traceTypeRich(exprType: string, result: string | null): void { + if (!diagTypeTraceEnabled) return; + const sites = captureSites(2); + const i = diagSeq; + diagSeq = diagSeq + 1; + const resultJson = result === null ? "null" : jsonEscapeString(result); + const line = + '{"cat":' + + jsonEscapeString(CAT_TYPE_TRACE) + + ',"k":"rich","i":' + + i.toString() + + ',"exprType":' + + jsonEscapeString(exprType) + + ',"result":' + + resultJson + + ',"sites":' + + sitesToJsonArray(sites) + + "}"; + recordEvent(line); +} From df9bf4696d5ea27685665b89243e31b9eab91e41 Mon Sep 17 00:00:00 2001 From: cs01 Date: Tue, 21 Apr 2026 22:11:36 -0700 Subject: [PATCH 2/2] format analyze-diagnostics.cjs --- scripts/analyze-diagnostics.cjs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/analyze-diagnostics.cjs b/scripts/analyze-diagnostics.cjs index a04cb4c9..35f42de6 100644 --- a/scripts/analyze-diagnostics.cjs +++ b/scripts/analyze-diagnostics.cjs @@ -110,7 +110,10 @@ ${fmt(orphanSetSite)} ## Orphan-set ratio per site (set-count >= 50) ${orphanRatioBySite .slice(0, 30) - .map((r) => ` ${String(r.orphans).padStart(7)}/${String(r.count).padStart(7)} (${pct(r.orphans, r.count)}) ${r.site}`) + .map( + (r) => + ` ${String(r.orphans).padStart(7)}/${String(r.count).padStart(7)} (${pct(r.orphans, r.count)}) ${r.site}`, + ) .join("\n")} ## Orphan GETs (read returns non-null but never set in trace — comes from symbolTable)