From a56e21c17dcc1e542595a596074c5d8e659c1168 Mon Sep 17 00:00:00 2001 From: Yusuke Hirao Date: Wed, 17 Jun 2026 12:25:02 +0900 Subject: [PATCH] feat(beholder): expose extractMetaFromDocument for jsdom-backed meta extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a Puppeteer-free public entry point that accepts any `Window` (e.g. jsdom) and returns the same `Meta` shape produced by `Scraper.scrapeStart()`. The DOM walk previously inlined inside `collectHeadOnPage` is extracted to `meta/collect-head.ts` and reused by both the Puppeteer path (`page.evaluate(string)` over the shared function source) and the new `extractMetaFromDocument` path — keeping a single source of truth for the raw head collector. - New: `extractMetaFromDocument(window, context)` and `ExtractMetaContext`, exported from `index.ts`. - New: `meta/collect-head.ts` houses the realm-agnostic collector. The function reads HTML class constructors off the passed `window` so `instanceof` resolves against the caller's realm (browser or jsdom). - Refactor: `collectHeadOnPage` now does `page.evaluate` over the shared function source via `Function.prototype.toString`, eliminating the duplicated inline body. - Tests: 12 new `extract-meta.spec.ts` cases driven by jsdom, covering title, og/twitter, viewport/robots/theme-color media branches, link canonical, JSON-LD (valid + parse error), microdata/RDFa, base/iframe, outerHTML fallback, `_raw` debug, `window-global` simulation, and headers/statusCode forwarding. - Docs: README gains a "Puppeteer なし" usage section; JSDoc on the new API carries `@example` and documents the `as unknown as Window` cast jsdom requires. - Deps: pin `jsdom@29.1.1` and `@types/jsdom@28.0.3` as devDependencies (test-only; no runtime impact). Co-Authored-By: Claude Opus 4.7 --- packages/@d-zero/beholder/README.md | 26 ++ packages/@d-zero/beholder/package.json | 4 +- .../@d-zero/beholder/src/dom-evaluation.ts | 181 +-------- .../@d-zero/beholder/src/extract-meta.spec.ts | 247 +++++++++++++ packages/@d-zero/beholder/src/extract-meta.ts | 121 ++++++ packages/@d-zero/beholder/src/index.ts | 2 + .../@d-zero/beholder/src/meta/collect-head.ts | 247 +++++++++++++ yarn.lock | 349 +++++++++++++++++- 8 files changed, 1013 insertions(+), 164 deletions(-) create mode 100644 packages/@d-zero/beholder/src/extract-meta.spec.ts create mode 100644 packages/@d-zero/beholder/src/extract-meta.ts create mode 100644 packages/@d-zero/beholder/src/meta/collect-head.ts diff --git a/packages/@d-zero/beholder/README.md b/packages/@d-zero/beholder/README.md index 5a462994..290e7fd2 100644 --- a/packages/@d-zero/beholder/README.md +++ b/packages/@d-zero/beholder/README.md @@ -32,3 +32,29 @@ if (result.type === 'success') { ``` 設計判断(イベントではなく戻り値で返す理由、`page` のライフサイクル責務、リトライ機構など)は `src/scraper.ts` の JSDoc を参照。 + +## DOM 文字列からメタ抽出(Puppeteer なし) + +HTML 文字列を jsdom などでパースしてから `Meta` を取り出したい場合、`extractMetaFromDocument` を使う。`Scraper` が内部で呼ぶ `collectHead → detectTags → classify` パイプラインと同じ実装を再利用するため、戻り値の `Meta` 形状は `scrapeStart` と同一。DOM ライブラリ(jsdom 等)はユーザランドの責務。 + +```ts +import { extractMetaFromDocument } from '@d-zero/beholder'; +import { JSDOM } from 'jsdom'; + +const url = 'https://example.com/'; +const html = await (await fetch(url)).text(); +const dom = new JSDOM(html, { url }); + +// `as unknown as Window` は jsdom の `DOMWindow` 型が lib.dom の `Window` と +// 構造的に完全一致しないための型キャスト。ランタイムでは互換。 +const meta = await extractMetaFromDocument(dom.window as unknown as Window, { + url, + html, +}); + +console.log(meta.title); +console.log(meta.og?.image); +console.log(meta.tags.entries); +``` + +`context.html` を省略すると `window.document.documentElement.outerHTML` がフォールバックされる。ただし Wappalyzer の HTML パターンはスクリプト実行前の生 HTML に合わせて作られているので、可能なら取得直後の HTML 文字列を明示的に渡す方が検出が安定する。 diff --git a/packages/@d-zero/beholder/package.json b/packages/@d-zero/beholder/package.json index bd2712a1..25fdc88b 100644 --- a/packages/@d-zero/beholder/package.json +++ b/packages/@d-zero/beholder/package.json @@ -27,7 +27,9 @@ "simple-wappalyzer": "1.1.99" }, "devDependencies": { - "@types/debug": "4.1.12" + "@types/debug": "4.1.12", + "@types/jsdom": "28.0.3", + "jsdom": "29.1.1" }, "repository": { "type": "git", diff --git a/packages/@d-zero/beholder/src/dom-evaluation.ts b/packages/@d-zero/beholder/src/dom-evaluation.ts index 6efa8c93..ddbf8c78 100644 --- a/packages/@d-zero/beholder/src/dom-evaluation.ts +++ b/packages/@d-zero/beholder/src/dom-evaluation.ts @@ -22,6 +22,7 @@ import { raceWithTimeout } from '@d-zero/shared/race-with-timeout'; import { domDetailsLog, domLog } from './debug.js'; import { classify, emptyMeta } from './meta/classify.js'; +import { WINDOW_GLOBALS_TO_CHECK, collectHeadFromDocument } from './meta/collect-head.js'; import { detectTags } from './meta/tag-detection.js'; import { parseUrl } from './parse-url.js'; @@ -515,46 +516,6 @@ export type GetMetaContext = { readonly includeRaw?: boolean; }; -const WINDOW_GLOBALS_TO_CHECK: readonly string[] = [ - 'dataLayer', - 'gtag', - 'ga', - '_gaq', - 'fbq', - '_fbq', - 'clarity', - '_hjSettings', - '_hjid', - 'twq', - 'ttq', - '_linkedin_partner_id', - 'pintrk', - 'amplitude', - 'mixpanel', - 'analytics', - 'heap', - 'posthog', - 'plausible', - 'fathom', - '_paq', - 's_account', - 's', - 'ym', - 'UET', - 'optimizely', - '_hsq', - 'Sentry', - 'Intercom', - 'intercomSettings', - 'drift', - 'Tawk_API', - 'zE', - 'OneTrust', - 'Cookiebot', - 'Stripe', - 'grecaptcha', -]; - /** * Extracts comprehensive metadata from the page. * @@ -639,129 +600,27 @@ async function runGetMeta(page: Page, context: GetMetaContext): Promise` entries from a Puppeteer page by injecting + * {@link collectHeadFromDocument} into the page realm. * - * @param page + * WHY string-eval instead of `page.evaluate(fn, args)`: the shared + * implementation lives in this module (`collectHeadFromDocument`), and a + * `page.evaluate(() => collectHeadFromDocument(window, …))` wrapper cannot + * reach that module-scope binding inside the page realm — only the wrapper's + * own source crosses the CDP boundary. Serializing the implementation via + * `Function.prototype.toString` and invoking it through + * `page.evaluate(string)` is what keeps the Puppeteer path and the + * jsdom path on one source of truth. + * + * The same {@link collectHeadFromDocument} function is also exposed via + * {@link ../extract-meta.ts | extractMetaFromDocument} for jsdom/Node callers, + * so the two paths cannot drift apart. + * @param page - The Puppeteer page whose document will be inspected. */ async function collectHeadOnPage(page: Page): Promise { - const raw = await page - .evaluate((knownGlobals: readonly string[]) => { - /* global document, HTMLLinkElement, HTMLMetaElement, HTMLBaseElement, - HTMLScriptElement, HTMLIFrameElement */ - type Out = unknown; - const entries: Out[] = []; - - const html = document.documentElement; - entries.push( - { - kind: 'html', - lang: html.lang || undefined, - dir: html.dir || undefined, - xmlns: html.getAttribute('xmlns') ?? undefined, - prefix: html.getAttribute('prefix') ?? undefined, - vocab: html.getAttribute('vocab') ?? undefined, - typeOf: html.getAttribute('typeof') ?? undefined, - itemscope: html.hasAttribute('itemscope') || undefined, - itemtype: html.getAttribute('itemtype') ?? undefined, - amp: html.hasAttribute('amp') || undefined, - lightning: html.hasAttribute('⚡') || undefined, - }, - { kind: 'title', content: document.title }, - ); - - for (const base of document.querySelectorAll('base')) { - if (!(base instanceof HTMLBaseElement)) continue; - entries.push({ - kind: 'base', - href: base.getAttribute('href') ?? undefined, - target: base.getAttribute('target') ?? undefined, - }); - } - - for (const meta of document.querySelectorAll('meta')) { - if (!(meta instanceof HTMLMetaElement)) continue; - const name = meta.getAttribute('name'); - const property = meta.getAttribute('property'); - const httpEquiv = meta.getAttribute('http-equiv'); - const itemprop = meta.getAttribute('itemprop'); - const charset = meta.getAttribute('charset'); - const content = meta.getAttribute('content'); - const media = meta.getAttribute('media'); - entries.push({ - kind: 'meta', - name: name ? name.toLowerCase() : undefined, - property: property ? property.toLowerCase() : undefined, - httpEquiv: httpEquiv ? httpEquiv.toLowerCase() : undefined, - itemprop: itemprop ?? undefined, - charset: charset ?? undefined, - content: content ?? undefined, - media: media ?? undefined, - }); - } - - for (const link of document.querySelectorAll('link[href]')) { - if (!(link instanceof HTMLLinkElement)) continue; - const relRaw = link.getAttribute('rel') ?? ''; - const rel = relRaw.toLowerCase().split(/\s+/u).filter(Boolean); - entries.push({ - kind: 'link', - rel, - href: link.getAttribute('href') ?? '', - type: link.getAttribute('type') ?? undefined, - media: link.getAttribute('media') ?? undefined, - sizes: link.getAttribute('sizes') ?? undefined, - title: link.getAttribute('title') ?? undefined, - hreflang: link.getAttribute('hreflang') ?? undefined, - as: link.getAttribute('as') ?? undefined, - crossorigin: link.getAttribute('crossorigin') ?? undefined, - color: link.getAttribute('color') ?? undefined, - blocking: link.getAttribute('blocking') ?? undefined, - imagesrcset: link.getAttribute('imagesrcset') ?? undefined, - }); - } - - const STRUCTURED_TYPES = new Set([ - 'application/ld+json', - 'speculationrules', - 'application/json+oembed', - 'application/xml+oembed', - ]); - for (const script of document.querySelectorAll('script[type]')) { - if (!(script instanceof HTMLScriptElement)) continue; - const scriptType = (script.getAttribute('type') ?? '').toLowerCase(); - if (!STRUCTURED_TYPES.has(scriptType)) continue; - const src = script.getAttribute('src') ?? undefined; - const text = script.textContent ?? ''; - const inHead = !!script.closest('head'); - const inNoscript = !!script.closest('noscript'); - const location = inHead ? 'head' : inNoscript ? 'noscript' : 'body'; - entries.push({ - kind: 'script', - scriptType, - content: text || undefined, - src, - location, - }); - } - - for (const iframe of document.querySelectorAll('iframe[src]')) { - if (!(iframe instanceof HTMLIFrameElement)) continue; - const src = iframe.getAttribute('src') ?? ''; - if (!src) continue; - const inHead = !!iframe.closest('head'); - const inNoscript = !!iframe.closest('noscript'); - const location = inHead ? 'head' : inNoscript ? 'noscript' : 'body'; - entries.push({ kind: 'iframe', src, location }); - } - - const win = window as unknown as Record; - const presentGlobals = knownGlobals.filter((name) => win[name] !== undefined); - if (presentGlobals.length > 0) { - entries.push({ kind: 'window-global', names: presentGlobals }); - } - - return entries; - }, WINDOW_GLOBALS_TO_CHECK) - .catch(() => [] as unknown[]); - + const fnSource = collectHeadFromDocument.toString(); + const globalsLiteral = JSON.stringify(WINDOW_GLOBALS_TO_CHECK); + const expr = `(${fnSource})(window, ${globalsLiteral})`; + const raw = await page.evaluate(expr).catch(() => [] as unknown[]); return raw as RawHeadEntry[]; } diff --git a/packages/@d-zero/beholder/src/extract-meta.spec.ts b/packages/@d-zero/beholder/src/extract-meta.spec.ts new file mode 100644 index 00000000..582a5edb --- /dev/null +++ b/packages/@d-zero/beholder/src/extract-meta.spec.ts @@ -0,0 +1,247 @@ +import { JSDOM } from 'jsdom'; +import { describe, expect, it } from 'vitest'; + +import { extractMetaFromDocument } from './extract-meta.js'; + +const URL = 'https://example.com/'; + +/** + * + * @param html + */ +function mkDom(html: string): JSDOM { + return new JSDOM(html, { url: URL }); +} + +/** + * + * @param dom + */ +function asWindow(dom: JSDOM): Window { + return dom.window as unknown as Window; +} + +describe('extractMetaFromDocument', () => { + it('extracts , lang and basic <meta name=description>', async () => { + const html = `<!doctype html> + <html lang="ja"> + <head> + <title>Example Title + + + + + `; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { url: URL, html }); + + expect(meta.title).toBe('Example Title'); + expect(meta.lang).toBe('ja'); + expect(meta.description).toBe('An example page'); + expect(meta.keywords).toBe('a, b, c'); + }); + + it('parses og:* and twitter:* meta tags', async () => { + const html = ` + + + OG + + + + + + + + + `; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { url: URL, html }); + + expect(meta.og?.title).toBe('OG Title'); + expect(meta.og?.type).toBe('article'); + expect(meta.og?.image).toEqual([ + 'https://example.com/a.png', + 'https://example.com/b.png', + ]); + expect(meta.twitter?.card).toBe('summary_large_image'); + expect(meta.twitter?.site).toBe('@example'); + }); + + it('parses viewport, robots and theme-color (with media branches)', async () => { + const html = ` + + + X + + + + + + + `; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { url: URL, html }); + + expect(meta.viewport?.width).toBe('device-width'); + expect(meta.viewport?.initialScale).toBe(1); + expect(meta.robots?.noindex).toBe(true); + expect(meta.robots?.nofollow).toBe(true); + expect(meta.themeColor).toBe('#000000'); + expect(meta.themeColorDark).toBe('#111111'); + expect(meta.themeColorLight).toBe('#eeeeee'); + }); + + it('captures and alternate hreflang', async () => { + const html = ` + + + L + + + + + `; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { url: URL, html }); + + expect(meta.link?.canonical).toBe('https://example.com/canonical'); + const hreflangs = meta.link?.alternateHreflang.map((e) => e.hreflang) ?? []; + expect(hreflangs).toEqual(['en', 'ja']); + }); + + it('parses inline JSON-LD scripts', async () => { + const data = { '@context': 'https://schema.org', '@type': 'WebPage', name: 'X' }; + const html = ` + + + J + + + `; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { url: URL, html }); + + expect(meta.jsonLd).toHaveLength(1); + const first = meta.jsonLd[0]; + expect(first?.parsed).toEqual(data); + }); + + it('captures itemtype/itemscope (microdata) and prefix/vocab (RDFa) from ', async () => { + const html = ` + + M + `; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { url: URL, html }); + + expect(meta.microdata?.itemscope).toBe(true); + expect(meta.microdata?.itemtype).toBe('https://schema.org/WebPage'); + expect(meta.rdfa?.prefix).toBe('og: https://ogp.me/ns#'); + expect(meta.rdfa?.vocab).toBe('https://schema.org/'); + expect(meta.rdfa?.typeOf).toBe('WebPage'); + }); + + it('captures and + + `; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { url: URL, html }); + + expect(meta.baseHref).toBe('https://example.com/sub/'); + expect(meta.others.iframe).toEqual([ + { src: 'https://www.youtube.com/embed/abc', location: 'body' }, + ]); + }); + + it('falls back to documentElement.outerHTML when context.html is omitted', async () => { + const html = `FB`; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { url: URL }); + expect(meta.title).toBe('FB'); + expect(meta.tags).toBeDefined(); + expect(meta.tags.entries).toBeInstanceOf(Array); + }); + + it('returns includeRaw when requested', async () => { + const html = `R`; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { + url: URL, + html, + includeRaw: true, + }); + expect(meta._raw).toBeInstanceOf(Array); + expect(meta._raw?.some((e) => e.kind === 'title')).toBe(true); + }); + + it("emits a 'window-global' raw entry when known globals are present on the window", async () => { + const html = `WG`; + const dom = mkDom(html); + // jsdom does not execute scripts by default, so simulate a tag library + // having installed itself onto `window` (the production trigger for the + // `window-global` branch in `collectHeadFromDocument`). + (dom.window as unknown as Record).dataLayer = []; + (dom.window as unknown as Record).fbq = () => {}; + + const meta = await extractMetaFromDocument(asWindow(dom), { + url: URL, + html, + includeRaw: true, + }); + + const globalEntry = meta._raw?.find((e) => e.kind === 'window-global'); + expect(globalEntry).toBeDefined(); + // Force a type error if the narrow ever fails, rather than letting the + // trailing `expect` calls silently skip via an `if` branch. + if (globalEntry === undefined || globalEntry.kind !== 'window-global') { + throw new Error('expected a window-global raw entry'); + } + expect(globalEntry.names).toContain('dataLayer'); + expect(globalEntry.names).toContain('fbq'); + }); + + it('forwards headers and statusCode to the tag-detection layer', async () => { + // We can't assert Wappalyzer's internal decisions without coupling to its + // signature table, but we can at least verify that supplying headers and + // statusCode does not throw and that the returned Meta is still well-formed. + const html = `H`; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { + url: URL, + html, + headers: { + 'content-type': 'text/html; charset=utf-8', + 'x-powered-by': 'Express', + }, + statusCode: 200, + }); + expect(meta.title).toBe('H'); + expect(Array.isArray(meta.tags.entries)).toBe(true); + }); + + it('records parseError for malformed inline JSON-LD', async () => { + const html = ` + + + JE + + + `; + const dom = mkDom(html); + const meta = await extractMetaFromDocument(asWindow(dom), { url: URL, html }); + + expect(meta.jsonLd).toHaveLength(1); + const first = meta.jsonLd[0]; + expect(first?.parsed).toBeUndefined(); + expect(typeof first?.parseError).toBe('string'); + }); +}); diff --git a/packages/@d-zero/beholder/src/extract-meta.ts b/packages/@d-zero/beholder/src/extract-meta.ts new file mode 100644 index 00000000..bccc8fa8 --- /dev/null +++ b/packages/@d-zero/beholder/src/extract-meta.ts @@ -0,0 +1,121 @@ +/** + * Public, Puppeteer-free entry point for extracting {@link Meta} from an + * already-parsed DOM (e.g. jsdom). + * + * WHY this exists alongside `Scraper.scrapeStart()` / `getMeta(page, …)`: + * callers who already have an HTML string (from `fetch`, a fixture, an + * archive) should not be forced to spin up Chromium just to read a few `` + * tags. This module reuses the same `collectHead → detectTags → classify` + * pipeline as the Puppeteer path — the `Meta` shape returned here is + * identical to what `Scraper` produces, so downstream consumers do not branch + * on the source. + * + * See {@link extractMetaFromDocument} for the usage example. + * @module + */ + +import type { Meta } from './types.js'; + +import { classify } from './meta/classify.js'; +import { collectHeadFromDocument, WINDOW_GLOBALS_TO_CHECK } from './meta/collect-head.js'; +import { detectTags } from './meta/tag-detection.js'; + +/** + * Inputs for {@link extractMetaFromDocument}. + * + * `url`/`statusCode`/`headers` mirror the inputs to the underlying + * `simple-wappalyzer` driver. They are not consumed by the DOM-walk side of + * the pipeline. + * + * `html` is optional: when omitted, `document.documentElement.outerHTML` is + * read off the passed window — matching the fallback `getMeta(page, …)` does + * via `page.content()`. + */ +export type ExtractMetaContext = { + /** The fully resolved URL of the page (used by Wappalyzer + AMP fields). */ + readonly url: string; + /** + * Rendered HTML used for technology detection. Defaults to + * `window.document.documentElement.outerHTML` when omitted. + * + * WHY allow override: callers that fetched the raw HTML string from the + * network already have the *pre-script-execution* markup, which is what + * Wappalyzer's HTML patterns are tuned for. The serialized DOM from + * `outerHTML` reflects whatever scripts have already mutated; provide the + * raw string to get more stable detections. + */ + readonly html?: string; + /** HTTP status code, surfaced to the Wappalyzer driver. */ + readonly statusCode?: number; + /** + * Response headers; case is preserved by the caller, lowercased internally + * by `detectTags`. + */ + readonly headers?: Record; + /** + * When `true`, the returned `Meta` includes `_raw: RawHeadEntry[]` for + * debugging. Default `false` to keep the serialized payload small. + */ + readonly includeRaw?: boolean; +}; + +/** + * Extracts a `Meta` object from a DOM provided by the caller. + * + * Pipeline: + * + * 1. {@link collectHeadFromDocument} walks `window.document` and returns a + * serializable `RawHeadEntry[]`. + * 2. {@link detectTags} runs `simple-wappalyzer` over the HTML + headers to + * detect third-party technologies. + * 3. {@link classify} folds the two signals together into a typed `Meta`. + * + * Step (1) is synchronous and runs first; step (2) is awaited next. The two + * are independent in principle, but the current shape is sequential — keeping + * it that way avoids forcing the synchronous DOM walk into a microtask just to + * gain a few milliseconds of overlap with the Wappalyzer call. + * @param window - The window whose `document` will be walked. jsdom's + * `dom.window` works; pass any object satisfying the `Window` + * type. The function never mutates the document. + * @param context - URL / HTML / headers / status code context. See + * {@link ExtractMetaContext}. + * @returns The extracted `Meta` (always defined; empty fields stay empty). + * @example + * ```ts + * import { JSDOM } from 'jsdom'; + * import { extractMetaFromDocument } from '@d-zero/beholder'; + * + * const url = 'https://example.com/'; + * const html = await (await fetch(url)).text(); + * const dom = new JSDOM(html, { url }); + * + * // The `as unknown as Window` cast is needed because jsdom's `DOMWindow` is + * // not structurally identical to lib.dom's `Window` (a few rare globals + * // differ), but the runtime shape is compatible for this function's needs. + * const meta = await extractMetaFromDocument(dom.window as unknown as Window, { + * url, + * html, + * }); + * + * meta.title; // + * meta.og?.image; // og:image[] + * meta.tags.entries; // Wappalyzer detections + extracted IDs + * ``` + */ +export async function extractMetaFromDocument( + window: Window, + context: ExtractMetaContext, +): Promise<Meta> { + const raw = collectHeadFromDocument(window, WINDOW_GLOBALS_TO_CHECK); + const html = context.html ?? window.document.documentElement.outerHTML; + const tags = await detectTags({ + url: context.url, + html, + ...(context.statusCode === undefined ? {} : { statusCode: context.statusCode }), + ...(context.headers === undefined ? {} : { headers: context.headers }), + }); + return classify(raw, { + tags, + ...(context.includeRaw ? { includeRaw: true } : {}), + }); +} diff --git a/packages/@d-zero/beholder/src/index.ts b/packages/@d-zero/beholder/src/index.ts index 1019dca4..fa55b14e 100644 --- a/packages/@d-zero/beholder/src/index.ts +++ b/packages/@d-zero/beholder/src/index.ts @@ -12,6 +12,8 @@ */ export { default as default } from './scraper.js'; export { isError } from './is-error.js'; +export { extractMetaFromDocument } from './extract-meta.js'; +export type { ExtractMetaContext } from './extract-meta.js'; export { detectCompress } from '@d-zero/shared/detect-compress'; export type { CompressType } from '@d-zero/shared/detect-compress'; export { detectCDN } from '@d-zero/shared/detect-cdn'; diff --git a/packages/@d-zero/beholder/src/meta/collect-head.ts b/packages/@d-zero/beholder/src/meta/collect-head.ts new file mode 100644 index 00000000..4b4e3e17 --- /dev/null +++ b/packages/@d-zero/beholder/src/meta/collect-head.ts @@ -0,0 +1,247 @@ +/** + * DOM-side raw `<head>` collector. + * + * `collectHeadFromDocument` walks a `Document` (Puppeteer page realm or jsdom realm + * alike) and produces a serializable {@link RawHeadEntry}[] that + * {@link ../meta/classify.ts | classify} can turn into a typed `Meta`. + * + * WHY this function is realm-agnostic: + * + * - The Puppeteer path stringifies this function via `Function.prototype.toString` + * and runs it as a `page.evaluate(string)` expression, so any closure over + * module-scope bindings would resolve to `undefined` in the browser realm. + * - The jsdom (Node) path calls it directly with the jsdom `Window`. Because + * `HTMLLinkElement` (etc.) in jsdom is a *different class instance* from the + * one in the page realm, `instanceof` only works when the constructor is read + * from the *passed* `window` rather than from bare globals. + * + * Together those constraints dictate that the function MUST: + * + * 1. Reference no module-level variables — only its own parameters and inner locals. + * 2. Take every HTML class constructor (`HTMLBaseElement`, …) from the passed + * `window` via destructuring instead of relying on ambient globals. + * 3. Stay in plain ES syntax (no TS-only constructs that need helper imports). + * @module + */ + +import type { RawHeadEntry } from './types.js'; + +/** + * Curated list of `window` globals whose presence indicates that a third-party + * tag library has been loaded on the page. Surfaced as a single + * `kind: 'window-global'` entry so that downstream consumers (e.g. tag-detection) + * can cross-reference the script/iframe signals. + * + * Kept here (rather than in `dom-evaluation.ts`) so the Puppeteer path and the + * jsdom path share one source of truth. + */ +export const WINDOW_GLOBALS_TO_CHECK: readonly string[] = [ + 'dataLayer', + 'gtag', + 'ga', + '_gaq', + 'fbq', + '_fbq', + 'clarity', + '_hjSettings', + '_hjid', + 'twq', + 'ttq', + '_linkedin_partner_id', + 'pintrk', + 'amplitude', + 'mixpanel', + 'analytics', + 'heap', + 'posthog', + 'plausible', + 'fathom', + '_paq', + 's_account', + 's', + 'ym', + 'UET', + 'optimizely', + '_hsq', + 'Sentry', + 'Intercom', + 'intercomSettings', + 'drift', + 'Tawk_API', + 'zE', + 'OneTrust', + 'Cookiebot', + 'Stripe', + 'grecaptcha', +]; + +/** + * Walks the given window's `Document` and returns a serializable list of raw + * head entries. + * + * Two realms are supported: + * + * - Browser realm (Puppeteer): the function source is `.toString()`'d and run + * inside the page via `page.evaluate(string)`. Inside the page, `window` + * resolves to the page's global object, so destructured class constructors + * match `instanceof` checks against elements returned from `querySelectorAll`. + * - Node realm (jsdom et al.): the caller passes `dom.window` directly. jsdom's + * HTML element prototypes are distinct from the host Node's bare globals, so + * reading the constructors off the passed `window` is what makes `instanceof` + * succeed. + * + * The function MUST NOT close over any module-scope binding — all data it needs + * is reached through its two parameters. + * @param window - The window object whose `document` will be inspected. Provides + * both the DOM tree and the HTML element constructors used for + * `instanceof` narrowing. + * @param knownGlobals - Names of `window` properties that, when present, + * indicate a third-party tag library is loaded. Required + * (no default) so the Puppeteer-side string-eval path + * does not have to inline a default value list. + * @returns Serializable list of raw head entries for {@link ../meta/classify.ts | classify}. + */ +export function collectHeadFromDocument( + window: Window, + knownGlobals: readonly string[], +): RawHeadEntry[] { + const document = window.document; + // TypeScript's `Window` interface in lib.dom does not directly expose the + // HTML element constructors (`HTMLLinkElement`, `HTMLScriptElement`, …) + // even though every real window object — browser realm AND jsdom realm — + // carries them at runtime. Widening the type here lets us destructure them + // uniformly; the runtime values come straight from the passed window, so + // the cast is purely cosmetic for TS and erased at compile time. + const w = window as Window & { + HTMLBaseElement: typeof globalThis.HTMLBaseElement; + HTMLMetaElement: typeof globalThis.HTMLMetaElement; + HTMLLinkElement: typeof globalThis.HTMLLinkElement; + HTMLScriptElement: typeof globalThis.HTMLScriptElement; + HTMLIFrameElement: typeof globalThis.HTMLIFrameElement; + }; + const { + HTMLBaseElement, + HTMLMetaElement, + HTMLLinkElement, + HTMLScriptElement, + HTMLIFrameElement, + } = w; + + const entries: RawHeadEntry[] = []; + + const html = document.documentElement; + entries.push( + { + kind: 'html', + lang: html.lang || undefined, + dir: html.dir || undefined, + xmlns: html.getAttribute('xmlns') ?? undefined, + prefix: html.getAttribute('prefix') ?? undefined, + vocab: html.getAttribute('vocab') ?? undefined, + typeOf: html.getAttribute('typeof') ?? undefined, + itemscope: html.hasAttribute('itemscope') || undefined, + itemtype: html.getAttribute('itemtype') ?? undefined, + amp: html.hasAttribute('amp') || undefined, + lightning: html.hasAttribute('⚡') || undefined, + }, + { kind: 'title', content: document.title }, + ); + + for (const base of document.querySelectorAll('base')) { + if (!(base instanceof HTMLBaseElement)) continue; + entries.push({ + kind: 'base', + href: base.getAttribute('href') ?? undefined, + target: base.getAttribute('target') ?? undefined, + }); + } + + for (const meta of document.querySelectorAll('meta')) { + if (!(meta instanceof HTMLMetaElement)) continue; + const name = meta.getAttribute('name'); + const property = meta.getAttribute('property'); + const httpEquiv = meta.getAttribute('http-equiv'); + const itemprop = meta.getAttribute('itemprop'); + const charset = meta.getAttribute('charset'); + const content = meta.getAttribute('content'); + const media = meta.getAttribute('media'); + entries.push({ + kind: 'meta', + name: name ? name.toLowerCase() : undefined, + property: property ? property.toLowerCase() : undefined, + httpEquiv: httpEquiv ? httpEquiv.toLowerCase() : undefined, + itemprop: itemprop ?? undefined, + charset: charset ?? undefined, + content: content ?? undefined, + media: media ?? undefined, + }); + } + + for (const link of document.querySelectorAll('link[href]')) { + if (!(link instanceof HTMLLinkElement)) continue; + const relRaw = link.getAttribute('rel') ?? ''; + const rel = relRaw.toLowerCase().split(/\s+/u).filter(Boolean); + entries.push({ + kind: 'link', + rel, + href: link.getAttribute('href') ?? '', + type: link.getAttribute('type') ?? undefined, + media: link.getAttribute('media') ?? undefined, + sizes: link.getAttribute('sizes') ?? undefined, + title: link.getAttribute('title') ?? undefined, + hreflang: link.getAttribute('hreflang') ?? undefined, + as: link.getAttribute('as') ?? undefined, + crossorigin: link.getAttribute('crossorigin') ?? undefined, + color: link.getAttribute('color') ?? undefined, + blocking: link.getAttribute('blocking') ?? undefined, + imagesrcset: link.getAttribute('imagesrcset') ?? undefined, + }); + } + + const STRUCTURED_TYPES = new Set([ + 'application/ld+json', + 'speculationrules', + 'application/json+oembed', + 'application/xml+oembed', + ]); + for (const script of document.querySelectorAll('script[type]')) { + if (!(script instanceof HTMLScriptElement)) continue; + const scriptType = (script.getAttribute('type') ?? '').toLowerCase(); + if (!STRUCTURED_TYPES.has(scriptType)) continue; + const src = script.getAttribute('src') ?? undefined; + const text = script.textContent ?? ''; + const inHead = !!script.closest('head'); + const inNoscript = !!script.closest('noscript'); + const location = inHead ? 'head' : inNoscript ? 'noscript' : 'body'; + entries.push({ + kind: 'script', + scriptType, + content: text || undefined, + src, + location, + }); + } + + for (const iframe of document.querySelectorAll('iframe[src]')) { + if (!(iframe instanceof HTMLIFrameElement)) continue; + const src = iframe.getAttribute('src') ?? ''; + if (!src) continue; + const inHead = !!iframe.closest('head'); + const inNoscript = !!iframe.closest('noscript'); + const location = inHead ? 'head' : inNoscript ? 'noscript' : 'body'; + entries.push({ kind: 'iframe', src, location }); + } + + const win = window as unknown as Record<string, unknown>; + const presentGlobals: string[] = []; + for (const name of knownGlobals) { + if (win[name] !== undefined) { + presentGlobals.push(name); + } + } + if (presentGlobals.length > 0) { + entries.push({ kind: 'window-global', names: presentGlobals }); + } + + return entries; +} diff --git a/yarn.lock b/yarn.lock index 993da986..0c58eeaf 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5,6 +5,46 @@ __metadata: version: 8 cacheKey: 10c0 +"@asamuzakjp/css-color@npm:^5.1.11": + version: 5.1.11 + resolution: "@asamuzakjp/css-color@npm:5.1.11" + dependencies: + "@asamuzakjp/generational-cache": "npm:^1.0.1" + "@csstools/css-calc": "npm:^3.2.0" + "@csstools/css-color-parser": "npm:^4.1.0" + "@csstools/css-parser-algorithms": "npm:^4.0.0" + "@csstools/css-tokenizer": "npm:^4.0.0" + checksum: 10c0/32720bdff8daea6a8847aba6cdfae55baa3b4a2690b51d21db7f0382bbd183f3d9f2d5126df50afd889062635684b2819e47113629ee2e80c99389e75f48d060 + languageName: node + linkType: hard + +"@asamuzakjp/dom-selector@npm:^7.1.1": + version: 7.1.1 + resolution: "@asamuzakjp/dom-selector@npm:7.1.1" + dependencies: + "@asamuzakjp/generational-cache": "npm:^1.0.1" + "@asamuzakjp/nwsapi": "npm:^2.3.9" + bidi-js: "npm:^1.0.3" + css-tree: "npm:^3.2.1" + is-potential-custom-element-name: "npm:^1.0.1" + checksum: 10c0/8cec1c618781c94de5836a215bbe5aafb4d8b835b18c51faf8547f4574afa39f92def3951e40123860062467613dd825f1e1600ff32e8045cc099a91796dcfb8 + languageName: node + linkType: hard + +"@asamuzakjp/generational-cache@npm:^1.0.1": + version: 1.0.1 + resolution: "@asamuzakjp/generational-cache@npm:1.0.1" + checksum: 10c0/1de62de43764e13fca3b9a31b7ea9b1bf0780fe053d266e40378a19ff8c66b543e011e6a0df02d410cd59bf981126706f176cdbb938985165202c4a079fe1057 + languageName: node + linkType: hard + +"@asamuzakjp/nwsapi@npm:^2.3.9": + version: 2.3.9 + resolution: "@asamuzakjp/nwsapi@npm:2.3.9" + checksum: 10c0/869b81382e775499c96c45c6dbe0d0766a6da04bcf0abb79f5333535c4e19946851acaa43398f896e2ecc5a1de9cf3db7cf8c4b1afac1ee3d15e21584546d74d + languageName: node + linkType: hard + "@axe-core/puppeteer@npm:4.11.0": version: 4.11.0 resolution: "@axe-core/puppeteer@npm:4.11.0" @@ -78,6 +118,17 @@ __metadata: languageName: node linkType: hard +"@bramus/specificity@npm:^2.4.2": + version: 2.4.2 + resolution: "@bramus/specificity@npm:2.4.2" + dependencies: + css-tree: "npm:^3.0.0" + bin: + specificity: bin/cli.js + checksum: 10c0/c5f4e04e0bca0d2202598207a5eb0733c8109d12a68a329caa26373bec598d99db5bb785b8865fefa00fc01b08c6068138807ceb11a948fe15e904ed6cf4ba72 + languageName: node + linkType: hard + "@cacheable/memory@npm:^2.0.7": version: 2.0.7 resolution: "@cacheable/memory@npm:2.0.7" @@ -849,6 +900,64 @@ __metadata: languageName: node linkType: hard +"@csstools/color-helpers@npm:^6.0.2": + version: 6.0.2 + resolution: "@csstools/color-helpers@npm:6.0.2" + checksum: 10c0/4c66574563d7c960010c11e41c2673675baff07c427cca6e8dddffa5777de45770d13ff3efce1c0642798089ad55de52870d9d8141f78db3fa5bba012f2d3789 + languageName: node + linkType: hard + +"@csstools/css-calc@npm:^3.2.0, @csstools/css-calc@npm:^3.2.1": + version: 3.2.1 + resolution: "@csstools/css-calc@npm:3.2.1" + peerDependencies: + "@csstools/css-parser-algorithms": ^4.0.0 + "@csstools/css-tokenizer": ^4.0.0 + checksum: 10c0/0191c8d1cd4dffa0d3b6bfd1e78a721934b1d7a6c972966e4fdaa72208c6789e8ff443ee81764a32f1e6107825695b5524ef2b4dc1681b5b29230f2a1277e5df + languageName: node + linkType: hard + +"@csstools/css-color-parser@npm:^4.1.0": + version: 4.1.1 + resolution: "@csstools/css-color-parser@npm:4.1.1" + dependencies: + "@csstools/color-helpers": "npm:^6.0.2" + "@csstools/css-calc": "npm:^3.2.1" + peerDependencies: + "@csstools/css-parser-algorithms": ^4.0.0 + "@csstools/css-tokenizer": ^4.0.0 + checksum: 10c0/427bd32f1a8917342a70a6fd97b93bb492aae7c8790e7782b5d6edc8c08064bb8aef0a86099f286db00288f9afea85eb92c46350e9057f5fea058e03a2a09203 + languageName: node + linkType: hard + +"@csstools/css-parser-algorithms@npm:^4.0.0": + version: 4.0.0 + resolution: "@csstools/css-parser-algorithms@npm:4.0.0" + peerDependencies: + "@csstools/css-tokenizer": ^4.0.0 + checksum: 10c0/94558c2428d6ef0ddef542e86e0a8376aa1263a12a59770abb13ba50d7b83086822c75433f32aa2e7fef00555e1cc88292f9ca5bce79aed232bb3fed73b1528d + languageName: node + linkType: hard + +"@csstools/css-syntax-patches-for-csstree@npm:^1.1.3": + version: 1.1.5 + resolution: "@csstools/css-syntax-patches-for-csstree@npm:1.1.5" + peerDependencies: + css-tree: ^3.2.1 + peerDependenciesMeta: + css-tree: + optional: true + checksum: 10c0/a31f0cfb74e2b5ce8a283c47969a202fc3b23c3ee05c6b6beab7f5c14d89c50b82533e446df74f7df0bf88bf23810ed59431353db26e00d5b013995c1ebf07a2 + languageName: node + linkType: hard + +"@csstools/css-tokenizer@npm:^4.0.0": + version: 4.0.0 + resolution: "@csstools/css-tokenizer@npm:4.0.0" + checksum: 10c0/669cf3d0f9c8e1ffdf8c9955ad8beba0c8cfe03197fe29a4fcbd9ee6f7a18856cfa42c62670021a75183d9ab37f5d14a866e6a9df753a6c07f59e36797a9ea9f + languageName: node + linkType: hard + "@d-zero/a11y-check-axe-scenario@npm:0.5.25, @d-zero/a11y-check-axe-scenario@workspace:packages/@d-zero/a11y-check-axe-scenario": version: 0.0.0-use.local resolution: "@d-zero/a11y-check-axe-scenario@workspace:packages/@d-zero/a11y-check-axe-scenario" @@ -971,7 +1080,9 @@ __metadata: "@d-zero/puppeteer-page-scan": "npm:4.5.1" "@d-zero/shared": "npm:0.22.0" "@types/debug": "npm:4.1.12" + "@types/jsdom": "npm:28.0.3" debug: "npm:4.4.3" + jsdom: "npm:29.1.1" puppeteer: "npm:24.37.5" simple-wappalyzer: "npm:1.1.99" languageName: unknown @@ -1647,6 +1758,18 @@ __metadata: languageName: node linkType: hard +"@exodus/bytes@npm:^1.11.0, @exodus/bytes@npm:^1.15.0, @exodus/bytes@npm:^1.6.0": + version: 1.15.1 + resolution: "@exodus/bytes@npm:1.15.1" + peerDependencies: + "@noble/hashes": ^1.8.0 || ^2.0.0 + peerDependenciesMeta: + "@noble/hashes": + optional: true + checksum: 10c0/333056a6953bbf875d9f3b86c32314de29458d842e5f56f6ef8034b18c2d9660184550093d1bae5de0064043d5e23f54cc03148798d9d29cf5167ac03f2e9f8c + languageName: node + linkType: hard + "@gar/promise-retry@npm:^1.0.0": version: 1.0.2 resolution: "@gar/promise-retry@npm:1.0.2" @@ -3808,6 +3931,18 @@ __metadata: languageName: node linkType: hard +"@types/jsdom@npm:28.0.3": + version: 28.0.3 + resolution: "@types/jsdom@npm:28.0.3" + dependencies: + "@types/node": "npm:*" + "@types/tough-cookie": "npm:*" + parse5: "npm:^8.0.0" + undici-types: "npm:^7.21.0" + checksum: 10c0/08b1cd61ee3e9610676be3c68a782a94667b86a5f73b8a262095d05f84c9e864fc11b25ae53450cd519a0abd46c202906a735bd61aa176257a981964bc5b1166 + languageName: node + linkType: hard + "@types/json-schema@npm:^7.0.15": version: 7.0.15 resolution: "@types/json-schema@npm:7.0.15" @@ -3967,6 +4102,13 @@ __metadata: languageName: node linkType: hard +"@types/tough-cookie@npm:*": + version: 4.0.5 + resolution: "@types/tough-cookie@npm:4.0.5" + checksum: 10c0/68c6921721a3dcb40451543db2174a145ef915bc8bcbe7ad4e59194a0238e776e782b896c7a59f4b93ac6acefca9161fccb31d1ce3b3445cb6faa467297fb473 + languageName: node + linkType: hard + "@types/unist@npm:^2, @types/unist@npm:^2.0.0, @types/unist@npm:^2.0.2": version: 2.0.11 resolution: "@types/unist@npm:2.0.11" @@ -5045,6 +5187,15 @@ __metadata: languageName: node linkType: hard +"bidi-js@npm:^1.0.3": + version: 1.0.3 + resolution: "bidi-js@npm:1.0.3" + dependencies: + require-from-string: "npm:^2.0.2" + checksum: 10c0/fdddea4aa4120a34285486f2267526cd9298b6e8b773ad25e765d4f104b6d7437ab4ba542e6939e3ac834a7570bcf121ee2cf6d3ae7cd7082c4b5bedc8f271e1 + languageName: node + linkType: hard + "bignumber.js@npm:^9.0.0": version: 9.3.1 resolution: "bignumber.js@npm:9.3.1" @@ -6334,6 +6485,16 @@ __metadata: languageName: node linkType: hard +"css-tree@npm:^3.0.0, css-tree@npm:^3.2.1": + version: 3.2.1 + resolution: "css-tree@npm:3.2.1" + dependencies: + mdn-data: "npm:2.27.1" + source-map-js: "npm:^1.2.1" + checksum: 10c0/1f65e9ccaa56112a4706d6f003dd43d777f0dbcf848e66fd320f823192533581f8dd58daa906cb80622658332d50284d6be13b87a6ab4556cbbfe9ef535bbf7e + languageName: node + linkType: hard + "cssesc@npm:^3.0.0": version: 3.0.0 resolution: "cssesc@npm:3.0.0" @@ -6406,6 +6567,16 @@ __metadata: languageName: node linkType: hard +"data-urls@npm:^7.0.0": + version: 7.0.0 + resolution: "data-urls@npm:7.0.0" + dependencies: + whatwg-mimetype: "npm:^5.0.0" + whatwg-url: "npm:^16.0.0" + checksum: 10c0/08d88ef50d8966a070ffdaa703e1e4b29f01bb2da364dfbc1612b1c2a4caa8045802c9532d81347b21781100132addb36a585071c8323b12cce97973961dee9f + languageName: node + linkType: hard + "dateformat@npm:^3.0.3": version: 3.0.3 resolution: "dateformat@npm:3.0.3" @@ -6449,6 +6620,13 @@ __metadata: languageName: node linkType: hard +"decimal.js@npm:^10.6.0": + version: 10.6.0 + resolution: "decimal.js@npm:10.6.0" + checksum: 10c0/07d69fbcc54167a340d2d97de95f546f9ff1f69d2b45a02fd7a5292412df3cd9eb7e23065e532a318f5474a2e1bccf8392fdf0443ef467f97f3bf8cb0477e5aa + languageName: node + linkType: hard + "decompress-response@npm:^6.0.0": version: 6.0.0 resolution: "decompress-response@npm:6.0.0" @@ -6827,6 +7005,13 @@ __metadata: languageName: node linkType: hard +"entities@npm:^8.0.0": + version: 8.0.0 + resolution: "entities@npm:8.0.0" + checksum: 10c0/938e631664c19451823344a351aeeafd74fae2d5fa51e4d5b6ff635afaefd4bacf0f609989888c04c42733f46ffdac15211608267ebb02488005891a4793e94d + languageName: node + linkType: hard + "env-paths@npm:^2.2.0, env-paths@npm:^2.2.1": version: 2.2.1 resolution: "env-paths@npm:2.2.1" @@ -8644,6 +8829,15 @@ __metadata: languageName: node linkType: hard +"html-encoding-sniffer@npm:^6.0.0": + version: 6.0.0 + resolution: "html-encoding-sniffer@npm:6.0.0" + dependencies: + "@exodus/bytes": "npm:^1.6.0" + checksum: 10c0/66dc3f6f5539cc3beb814fcbfae7eacf4ec38cf824d6e1425b72039b51a40f4456bd8541ba66f4f4fe09cdf885ab5cd5bae6ec6339d6895a930b2fdb83c53025 + languageName: node + linkType: hard + "html-entities@npm:^2.6.0": version: 2.6.0 resolution: "html-entities@npm:2.6.0" @@ -9199,6 +9393,13 @@ __metadata: languageName: node linkType: hard +"is-potential-custom-element-name@npm:^1.0.1": + version: 1.0.1 + resolution: "is-potential-custom-element-name@npm:1.0.1" + checksum: 10c0/b73e2f22bc863b0939941d369486d308b43d7aef1f9439705e3582bfccaa4516406865e32c968a35f97a99396dac84e2624e67b0a16b0a15086a785e16ce7db9 + languageName: node + linkType: hard + "is-promise@npm:^4.0.0": version: 4.0.0 resolution: "is-promise@npm:4.0.0" @@ -9478,6 +9679,40 @@ __metadata: languageName: node linkType: hard +"jsdom@npm:29.1.1": + version: 29.1.1 + resolution: "jsdom@npm:29.1.1" + dependencies: + "@asamuzakjp/css-color": "npm:^5.1.11" + "@asamuzakjp/dom-selector": "npm:^7.1.1" + "@bramus/specificity": "npm:^2.4.2" + "@csstools/css-syntax-patches-for-csstree": "npm:^1.1.3" + "@exodus/bytes": "npm:^1.15.0" + css-tree: "npm:^3.2.1" + data-urls: "npm:^7.0.0" + decimal.js: "npm:^10.6.0" + html-encoding-sniffer: "npm:^6.0.0" + is-potential-custom-element-name: "npm:^1.0.1" + lru-cache: "npm:^11.3.5" + parse5: "npm:^8.0.1" + saxes: "npm:^6.0.0" + symbol-tree: "npm:^3.2.4" + tough-cookie: "npm:^6.0.1" + undici: "npm:^7.25.0" + w3c-xmlserializer: "npm:^5.0.0" + webidl-conversions: "npm:^8.0.1" + whatwg-mimetype: "npm:^5.0.0" + whatwg-url: "npm:^16.0.1" + xml-name-validator: "npm:^5.0.0" + peerDependencies: + canvas: ^3.0.0 + peerDependenciesMeta: + canvas: + optional: true + checksum: 10c0/20e2174b09d9d06393cb48e1392b7a1cb7191d6656a6f7b3b8fbf9853b4ab0ef60b4a42c2c55f71b55ca5da50ffa75bcdc6986210963182e7993c6f9cd4f499b + languageName: node + linkType: hard + "jsesc@npm:^3.1.0, jsesc@npm:~3.1.0": version: 3.1.0 resolution: "jsesc@npm:3.1.0" @@ -10131,6 +10366,13 @@ __metadata: languageName: node linkType: hard +"lru-cache@npm:^11.3.5": + version: 11.5.1 + resolution: "lru-cache@npm:11.5.1" + checksum: 10c0/7b341cea79a8efe9c6a6f20c8757a77eca5b25d7ff983ccf4e11e547b81f6787824baa1c84705251dff84ab4ffac85717ac354b9d02e465f86a9f8b166409979 + languageName: node + linkType: hard + "lru-cache@npm:^6.0.0": version: 6.0.0 resolution: "lru-cache@npm:6.0.0" @@ -10395,6 +10637,13 @@ __metadata: languageName: node linkType: hard +"mdn-data@npm:2.27.1": + version: 2.27.1 + resolution: "mdn-data@npm:2.27.1" + checksum: 10c0/eb8abf5d22e4d1e090346f5e81b67d23cef14c83940e445da5c44541ad874dc8fb9f6ca236e8258c3a489d9fb5884188a4d7d58773adb9089ac2c0b966796393 + languageName: node + linkType: hard + "media-typer@npm:^1.1.0": version: 1.1.0 resolution: "media-typer@npm:1.1.0" @@ -12000,6 +12249,15 @@ __metadata: languageName: node linkType: hard +"parse5@npm:^8.0.0, parse5@npm:^8.0.1": + version: 8.0.1 + resolution: "parse5@npm:8.0.1" + dependencies: + entities: "npm:^8.0.0" + checksum: 10c0/c3c1c5aab55f6e4be5245599790e56e64be7764a4a0edd7f98db4fe3bb380f63add752fa047dff0496446c25f4104f0c7c1967723de640bde92306a7bb67ed2f + languageName: node + linkType: hard + "parseurl@npm:^1.3.3": version: 1.3.3 resolution: "parseurl@npm:1.3.3" @@ -12469,7 +12727,7 @@ __metadata: languageName: node linkType: hard -"punycode@npm:^2.1.0": +"punycode@npm:^2.1.0, punycode@npm:^2.3.1": version: 2.3.1 resolution: "punycode@npm:2.3.1" checksum: 10c0/14f76a8206bc3464f794fb2e3d3cc665ae416c01893ad7a02b23766eb07159144ee612ad67af5e84fa4479ccfe67678c4feb126b0485651b302babf66f04f9e9 @@ -13378,6 +13636,15 @@ __metadata: languageName: node linkType: hard +"saxes@npm:^6.0.0": + version: 6.0.0 + resolution: "saxes@npm:6.0.0" + dependencies: + xmlchars: "npm:^2.2.0" + checksum: 10c0/3847b839f060ef3476eb8623d099aa502ad658f5c40fd60c105ebce86d244389b0d76fcae30f4d0c728d7705ceb2f7e9b34bb54717b6a7dbedaf5dad2d9a4b74 + languageName: node + linkType: hard + "scslre@npm:^0.3.0": version: 0.3.0 resolution: "scslre@npm:0.3.0" @@ -14142,6 +14409,13 @@ __metadata: languageName: node linkType: hard +"symbol-tree@npm:^3.2.4": + version: 3.2.4 + resolution: "symbol-tree@npm:3.2.4" + checksum: 10c0/dfbe201ae09ac6053d163578778c53aa860a784147ecf95705de0cd23f42c851e1be7889241495e95c37cabb058edb1052f141387bef68f705afc8f9dd358509 + languageName: node + linkType: hard + "table@npm:^6.9.0": version: 6.9.0 resolution: "table@npm:6.9.0" @@ -14749,7 +15023,7 @@ __metadata: languageName: node linkType: hard -"tough-cookie@npm:~6.0.0": +"tough-cookie@npm:^6.0.1, tough-cookie@npm:~6.0.0": version: 6.0.1 resolution: "tough-cookie@npm:6.0.1" dependencies: @@ -14758,6 +15032,15 @@ __metadata: languageName: node linkType: hard +"tr46@npm:^6.0.0": + version: 6.0.0 + resolution: "tr46@npm:6.0.0" + dependencies: + punycode: "npm:^2.3.1" + checksum: 10c0/83130df2f649228aa91c17754b66248030a3af34911d713b5ea417066fa338aa4bc8668d06bd98aa21a2210f43fc0a3db8b9099e7747fb5830e40e39a6a1058e + languageName: node + linkType: hard + "tree-kill@npm:^1.2.2": version: 1.2.2 resolution: "tree-kill@npm:1.2.2" @@ -14992,6 +15275,13 @@ __metadata: languageName: node linkType: hard +"undici-types@npm:^7.21.0": + version: 7.27.2 + resolution: "undici-types@npm:7.27.2" + checksum: 10c0/d3efff570e7f348eea961706090221f61422fc202d4fb8d78fbea7060f0c6ad11f4ee7e796d0e222d6f3f4a6cc54ef22ced5e7fd4ac192b1738a2d30f45ff51b + languageName: node + linkType: hard + "undici-types@npm:~5.26.4": version: 5.26.5 resolution: "undici-types@npm:5.26.5" @@ -15013,6 +15303,13 @@ __metadata: languageName: node linkType: hard +"undici@npm:^7.25.0": + version: 7.27.2 + resolution: "undici@npm:7.27.2" + checksum: 10c0/714632147c80eb8eda8a52df51b481d346df5e035ccc1d87eb3bbcb8f92ec25d7cbbe81abdeae5db4e37a93e490c8d2fa2359ecdca4b2c5c6c513dcd2626ad47 + languageName: node + linkType: hard + "unicorn-magic@npm:^0.1.0": version: 0.1.0 resolution: "unicorn-magic@npm:0.1.0" @@ -15518,6 +15815,15 @@ __metadata: languageName: node linkType: hard +"w3c-xmlserializer@npm:^5.0.0": + version: 5.0.0 + resolution: "w3c-xmlserializer@npm:5.0.0" + dependencies: + xml-name-validator: "npm:^5.0.0" + checksum: 10c0/8712774c1aeb62dec22928bf1cdfd11426c2c9383a1a63f2bcae18db87ca574165a0fbe96b312b73652149167ac6c7f4cf5409f2eb101d9c805efe0e4bae798b + languageName: node + linkType: hard + "walk-up-path@npm:^4.0.0": version: 4.0.0 resolution: "walk-up-path@npm:4.0.0" @@ -15569,6 +15875,13 @@ __metadata: languageName: node linkType: hard +"webidl-conversions@npm:^8.0.1": + version: 8.0.1 + resolution: "webidl-conversions@npm:8.0.1" + checksum: 10c0/3f6f327ca5fa0c065ed8ed0ef3b72f33623376e68f958e9b7bd0df49fdb0b908139ac2338d19fb45bd0e05595bda96cb6d1622222a8b413daa38a17aacc4dd46 + languageName: node + linkType: hard + "whatwg-mimetype@npm:^3.0.0": version: 3.0.0 resolution: "whatwg-mimetype@npm:3.0.0" @@ -15576,6 +15889,24 @@ __metadata: languageName: node linkType: hard +"whatwg-mimetype@npm:^5.0.0": + version: 5.0.0 + resolution: "whatwg-mimetype@npm:5.0.0" + checksum: 10c0/eead164fe73a00dd82f817af6fc0bd22e9c273e1d55bf4bc6bdf2da7ad8127fca82ef00ea6a37892f5f5641f8e34128e09508f92126086baba126b9e0d57feb4 + languageName: node + linkType: hard + +"whatwg-url@npm:^16.0.0, whatwg-url@npm:^16.0.1": + version: 16.0.1 + resolution: "whatwg-url@npm:16.0.1" + dependencies: + "@exodus/bytes": "npm:^1.11.0" + tr46: "npm:^6.0.0" + webidl-conversions: "npm:^8.0.1" + checksum: 10c0/e75565566abf3a2cdbd9f06c965dbcccee6ec4e9f0d3728ad5e08ceb9944279848bcaa211d35a29cb6d2df1e467dd05cfb59fbddf8a0adcd7d0bce9ffb703fd2 + languageName: node + linkType: hard + "which@npm:^1.2.14": version: 1.3.1 resolution: "which@npm:1.3.1" @@ -15836,6 +16167,13 @@ __metadata: languageName: node linkType: hard +"xml-name-validator@npm:^5.0.0": + version: 5.0.0 + resolution: "xml-name-validator@npm:5.0.0" + checksum: 10c0/3fcf44e7b73fb18be917fdd4ccffff3639373c7cb83f8fc35df6001fecba7942f1dbead29d91ebb8315e2f2ff786b508f0c9dc0215b6353f9983c6b7d62cb1f5 + languageName: node + linkType: hard + "xml-parse-from-string@npm:^1.0.0": version: 1.0.1 resolution: "xml-parse-from-string@npm:1.0.1" @@ -15860,6 +16198,13 @@ __metadata: languageName: node linkType: hard +"xmlchars@npm:^2.2.0": + version: 2.2.0 + resolution: "xmlchars@npm:2.2.0" + checksum: 10c0/b64b535861a6f310c5d9bfa10834cf49127c71922c297da9d4d1b45eeaae40bf9b4363275876088fbe2667e5db028d2cd4f8ee72eed9bede840a67d57dab7593 + languageName: node + linkType: hard + "xtend@npm:^4.0.0, xtend@npm:^4.0.1, xtend@npm:~4.0.1": version: 4.0.2 resolution: "xtend@npm:4.0.2"