From 1b13d31301f3fcf0641dd50fd2cbe71e20ed5094 Mon Sep 17 00:00:00 2001 From: Tom Riddle Date: Fri, 1 May 2026 19:25:29 +0200 Subject: [PATCH 1/5] Add claude config --- CLAUDE.md | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..b873e862 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,81 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Porcelain is a 3D game engine written in Swift 5, built on Apple's Metal 2. It is structured as a reusable Swift framework (`Engine.framework`) consumed by game/demo projects. + +## Commands + +### Build + +```bash +xcodebuild build -project Engine.xcodeproj -scheme Engine archive \ + -derivedDataPath /tmp/engine-build -configuration Release \ + -destination "generic/platform=macOS" +``` + +### Run All Tests + +```bash +xcodebuild -project Engine.xcodeproj -scheme EngineTests test \ + -destination "platform=macOS" +``` + +### Run a Single Test + +```bash +xcodebuild -project Engine.xcodeproj -scheme EngineTests test \ + -destination "platform=macOS" \ + -only-testing "EngineTests/TestClassName/testMethodName" +``` + +### Lint + +```bash +swiftlint # Swift lint (strict, 80+ opt-in rules) +yamllint . # YAML files +mdl . # Markdown files +``` + +## Architecture + +### Rendering Pipeline + +The engine uses a **deferred rendering pipeline** implemented in Metal. Shaders live in `Engine/Shaders/` and cover: +- PBR shading with normal mapping and translucency +- Shadow mapping for point, spot, and directional lights (PCF soft shadows) +- Post-processing: bloom, motion blur, film grain, vignette, distance fog +- SSAO, environment mapping, particle effects + +### Scene Graph + +Scenes are built from nodes in `Engine/Core/Organization/`. The scene graph supports skeletal animation, rigid body animation, and ray-traced bounds queries. `Engine/Core/Scene/` holds scene management, bounds computation, and ray intersection logic. + +### Pipeline from Asset to Frame + +1. **Import** (`Engine/Core/Import/`) — USDZ models and height-map meshes are loaded via Model I/O. +2. **Translation** (`Engine/Core/Translation/`) — Scene descriptions are validated and converted to render-ready representations with render masks. +3. **Rendering** (`Engine/Core/Rendering/`) — The `Transcriber` drives the deferred pipeline each frame, consuming scene descriptions. +4. **Buffers** (`Engine/Core/Buffers/`) — `DynamicBuffer` and `FlatTree` manage GPU-visible memory; `DataBuffer` wraps raw Metal buffers. + +### Engine Entry Point + +`PNEngine` / `PNIEngine` (in `Engine/Core/Engine/`) is the public interface integrators use to configure and tick the engine. A `RepeatableTaskQueue` (in `Engine/Core/Task/`) drives per-frame update tasks. + +### Key Dependencies (Git Submodules) + +- **DependencyGraph** — custom dependency injection used throughout the engine. +- **ZPack** — SIMD/Metal utility extensions (also exposed via `Engine/Core/Extensions/`). + +### Platform Abstraction + +`Engine/Core/UI/` provides platform-specific screen interaction helpers for both macOS (AppKit) and iOS/tvOS (UIKit), keeping the rest of the engine platform-agnostic. + +### Code Style Constraints (SwiftLint) + +- Line length warning at 160, error at 180. +- Function body warning at 150 lines, error at 200. +- Cyclomatic complexity max: 12. +- `// TODO:` comments and implicit unwrapped optionals are disabled in lint; avoid introducing them. From 7a9d0edde950ef21b0fac30d1a525ea00194f003 Mon Sep 17 00:00:00 2001 From: Tom Riddle Date: Fri, 1 May 2026 19:32:22 +0200 Subject: [PATCH 2/5] Add triple buffering --- .../PNIThreadedWorkloadManagerFactory.swift | 5 +- .../PNIThreadedWorkloadManager.swift | 53 ++++++++++--------- .../WorkloadManager/PNIWorkloadManager.swift | 4 +- .../PNIRenderingCoordinator.swift | 5 +- .../Coordination/PNRenderingCoordinator.swift | 2 +- Engine/Core/Rendering/Stages/PNPipeline.swift | 4 +- 6 files changed, 39 insertions(+), 34 deletions(-) diff --git a/Engine/Core/Engine/Workload/Factories/PNIThreadedWorkloadManagerFactory.swift b/Engine/Core/Engine/Workload/Factories/PNIThreadedWorkloadManagerFactory.swift index 42509440..3c2a366b 100644 --- a/Engine/Core/Engine/Workload/Factories/PNIThreadedWorkloadManagerFactory.swift +++ b/Engine/Core/Engine/Workload/Factories/PNIThreadedWorkloadManagerFactory.swift @@ -8,10 +8,11 @@ public struct PNIThreadedWorkloadManagerFactory: PNWorkloadManagerFactory { renderingCoordinator: PNRenderingCoordinator, renderMaskGenerator: PNRenderMaskGenerator) -> PNWorkloadManager? { guard let bufferStoreA = bufferStoreFactory.new(), - let bufferStoreB = bufferStoreFactory.new() else { + let bufferStoreB = bufferStoreFactory.new(), + let bufferStoreC = bufferStoreFactory.new() else { return nil } - return PNIThreadedWorkloadManager(bufferStores: (bufferStoreA, bufferStoreB), + return PNIThreadedWorkloadManager(bufferStores: (bufferStoreA, bufferStoreB, bufferStoreC), renderingCoordinator: renderingCoordinator, renderMaskGenerator: renderMaskGenerator, transcriber: PNITranscriber.default) diff --git a/Engine/Core/Engine/Workload/WorkloadManager/PNIThreadedWorkloadManager.swift b/Engine/Core/Engine/Workload/WorkloadManager/PNIThreadedWorkloadManager.swift index 8941350c..1e6ab82c 100644 --- a/Engine/Core/Engine/Workload/WorkloadManager/PNIThreadedWorkloadManager.swift +++ b/Engine/Core/Engine/Workload/WorkloadManager/PNIThreadedWorkloadManager.swift @@ -12,55 +12,56 @@ public class PNIThreadedWorkloadManager: PNWorkloadManager { private let dispatchQueue = DispatchQueue.global() private let dispatchGroup = DispatchGroup() private let nodeUpdate = PNNodeUpdater() - private var frameSupplies: PNIBufferedValue + private var supplies: [PNFrameSupply] + private var writeIndex = 0 + private let semaphore = DispatchSemaphore(value: 3) private var previousFrameScene: PNSceneDescription? - public init(bufferStores: (PNBufferStore, PNBufferStore), + public init(bufferStores: (PNBufferStore, PNBufferStore, PNBufferStore), renderingCoordinator: PNRenderingCoordinator, renderMaskGenerator: PNRenderMaskGenerator, transcriber: PNTranscriber) { self.renderingCoordinator = renderingCoordinator self.transcriber = transcriber self.renderMaskGenerator = renderMaskGenerator - frameSupplies = PNIBufferedValue(PNFrameSupply(scene: PNSceneDescription(), - bufferStore: bufferStores.0, - mask: .empty), - PNFrameSupply(scene: PNSceneDescription(), - bufferStore: bufferStores.1, - mask: .empty)) + supplies = [ + PNFrameSupply(scene: PNSceneDescription(), bufferStore: bufferStores.0, mask: .empty), + PNFrameSupply(scene: PNSceneDescription(), bufferStore: bufferStores.1, mask: .empty), + PNFrameSupply(scene: PNSceneDescription(), bufferStore: bufferStores.2, mask: .empty) + ] } public func draw(sceneGraph: PNScene, taskQueue: PNRepeatableTaskQueue) { + semaphore.wait() + let slotIdx = writeIndex % 3 + let slot = supplies[slotIdx] dispatchGroup.enter() dispatchQueue.async { [unowned self] in let backgroundUpdateInterval = psignposter.beginInterval("Background update") taskQueue.execute() nodeUpdate.update(rootNode: sceneGraph.rootNode) let scene = transcriber.transcribe(scene: sceneGraph) - let inactive = frameSupplies.pullInactive if PNDefaults.shared.debug.boundingBoxes { let geometry = PNBoundingBoxCreator.vertices(boundingBoxes: scene.boundingBoxes) - inactive.bufferStore.boundingBoxes.upload(data: geometry) + slot.bufferStore.boundingBoxes.upload(data: geometry) } - inactive.bufferStore.matrixPalettes.upload(data: scene.palettes) - inactive.bufferStore.ambientLights.upload(data: scene.ambientLights) - inactive.bufferStore.omniLights.upload(data: scene.omniLights) - inactive.bufferStore.directionalLights.upload(data: scene.directionalLights) - inactive.bufferStore.spotLights.upload(data: scene.spotLights) - inactive.bufferStore.cameras.upload(data: scene.cameraUniforms) - inactive.bufferStore.modelCoordinateSystems.upload(data: scene.uniforms) + slot.bufferStore.matrixPalettes.upload(data: scene.palettes) + slot.bufferStore.ambientLights.upload(data: scene.ambientLights) + slot.bufferStore.omniLights.upload(data: scene.omniLights) + slot.bufferStore.directionalLights.upload(data: scene.directionalLights) + slot.bufferStore.spotLights.upload(data: scene.spotLights) + slot.bufferStore.cameras.upload(data: scene.cameraUniforms) + slot.bufferStore.modelCoordinateSystems.upload(data: scene.uniforms) let previous = previousFrameScene ?? scene - inactive.bufferStore.previousMatrixPalettes.upload(data: previous.palettes) - inactive.bufferStore.previousModelCoordinateSystems.upload(data: previous.uniforms) - let supply = PNFrameSupply(scene: scene, - bufferStore: inactive.bufferStore, - mask: renderMaskGenerator.generate(scene: scene)) - frameSupplies.push(supply) + slot.bufferStore.previousMatrixPalettes.upload(data: previous.palettes) + slot.bufferStore.previousModelCoordinateSystems.upload(data: previous.uniforms) + supplies[slotIdx] = PNFrameSupply(scene: scene, + bufferStore: slot.bufferStore, + mask: renderMaskGenerator.generate(scene: scene)) previousFrameScene = scene psignposter.endInterval("Background update", backgroundUpdateInterval) dispatchGroup.leave() } - renderingCoordinator.draw(frameSupply: frameSupplies.pull) dispatchGroup.wait() - frameSupplies.swap() + renderingCoordinator.draw(frameSupply: supplies[slotIdx], onComplete: { [weak self] in self?.semaphore.signal() }) + writeIndex += 1 } - } diff --git a/Engine/Core/Engine/Workload/WorkloadManager/PNIWorkloadManager.swift b/Engine/Core/Engine/Workload/WorkloadManager/PNIWorkloadManager.swift index fd9f32fd..050bef89 100644 --- a/Engine/Core/Engine/Workload/WorkloadManager/PNIWorkloadManager.swift +++ b/Engine/Core/Engine/Workload/WorkloadManager/PNIWorkloadManager.swift @@ -10,6 +10,7 @@ public class PNIWorkloadManager: PNWorkloadManager { private let renderMaskGenerator: PNRenderMaskGenerator private var previousFrameScene: PNSceneDescription? private let nodeUpdate = PNNodeUpdater() + private let semaphore = DispatchSemaphore(value: 1) public init(bufferStore: PNBufferStore, renderingCoordinator: PNRenderingCoordinator, renderMaskGenerator: PNRenderMaskGenerator, @@ -20,6 +21,7 @@ public class PNIWorkloadManager: PNWorkloadManager { self.renderMaskGenerator = renderMaskGenerator } public func draw(sceneGraph: PNScene, taskQueue: PNRepeatableTaskQueue) { + semaphore.wait() taskQueue.execute() nodeUpdate.update(rootNode: sceneGraph.rootNode) let scene = transcriber.transcribe(scene: sceneGraph) @@ -41,6 +43,6 @@ public class PNIWorkloadManager: PNWorkloadManager { bufferStore: bufferStore, mask: renderMaskGenerator.generate(scene: scene)) previousFrameScene = scene - renderingCoordinator.draw(frameSupply: supply) + renderingCoordinator.draw(frameSupply: supply, onComplete: { [weak self] in self?.semaphore.signal() }) } } diff --git a/Engine/Core/Rendering/Coordination/PNIRenderingCoordinator.swift b/Engine/Core/Rendering/Coordination/PNIRenderingCoordinator.swift index 7a68c325..3421150c 100644 --- a/Engine/Core/Rendering/Coordination/PNIRenderingCoordinator.swift +++ b/Engine/Core/Rendering/Coordination/PNIRenderingCoordinator.swift @@ -22,12 +22,13 @@ struct PNIRenderingCoordinator: PNRenderingCoordinator { self.pipeline = pipeline self.commandQueue = commandQueue } - mutating func draw(frameSupply: PNFrameSupply) { + mutating func draw(frameSupply: PNFrameSupply, onComplete: @escaping () -> Void) { guard frameSupply.scene.activeCameraIdx != .nil else { + onComplete() return } let encodingInterval = psignposter.beginInterval("Frame encoding") - pipeline.draw(commandQueue: commandQueue, supply: frameSupply) + pipeline.draw(commandQueue: commandQueue, supply: frameSupply, onComplete: onComplete) psignposter.endInterval("Frame encoding", encodingInterval) } } diff --git a/Engine/Core/Rendering/Coordination/PNRenderingCoordinator.swift b/Engine/Core/Rendering/Coordination/PNRenderingCoordinator.swift index d1b2f9b9..036e1ff3 100644 --- a/Engine/Core/Rendering/Coordination/PNRenderingCoordinator.swift +++ b/Engine/Core/Rendering/Coordination/PNRenderingCoordinator.swift @@ -4,5 +4,5 @@ /// Supervises and manages the rendering process. public protocol PNRenderingCoordinator { - mutating func draw(frameSupply: PNFrameSupply) + mutating func draw(frameSupply: PNFrameSupply, onComplete: @escaping () -> Void) } diff --git a/Engine/Core/Rendering/Stages/PNPipeline.swift b/Engine/Core/Rendering/Stages/PNPipeline.swift index 71c101a4..5d4a8584 100644 --- a/Engine/Core/Rendering/Stages/PNPipeline.swift +++ b/Engine/Core/Rendering/Stages/PNPipeline.swift @@ -132,7 +132,7 @@ class PNPipeline: PNStage { commandBuffer.present(drawable) } } - func draw(commandQueue: MTLCommandQueue, supply: PNFrameSupply) { + func draw(commandQueue: MTLCommandQueue, supply: PNFrameSupply, onComplete: @escaping () -> Void) { let wholeEncoding = psignposter.beginInterval("Whole encoding") var commandBuffers = [String: MTLCommandBuffer]() singlethreadVisitor.visit { node in @@ -157,8 +157,8 @@ class PNPipeline: PNStage { guard let commandBuffer = commandQueue.makeCommandBuffer() else { fatalError("Could not prepare command buffer for synchronization") } + commandBuffer.addCompletedHandler { _ in onComplete() } commandBuffer.commit() - commandBuffer.waitUntilCompleted() psignposter.endInterval("Whole encoding", wholeEncoding) } } From f8d666d1ed0f5767b3e8791c47b68f66846a8ce0 Mon Sep 17 00:00:00 2001 From: Tom Riddle Date: Fri, 1 May 2026 19:49:02 +0200 Subject: [PATCH 3/5] Optimize flat tree travelsal --- Engine/Core/Buffers/PNFlatTree.swift | 5 ++- .../DataStructures/PNFlatTree+Tests.swift | 39 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/Engine/Core/Buffers/PNFlatTree.swift b/Engine/Core/Buffers/PNFlatTree.swift index 0b100a26..4fe9391b 100644 --- a/Engine/Core/Buffers/PNFlatTree.swift +++ b/Engine/Core/Buffers/PNFlatTree.swift @@ -15,6 +15,7 @@ public struct PNFlatTreeNode { /// A structure representing a tree backed by an array. public struct PNFlatTree { private var objects: [PNFlatTreeNode] + private var childrenMap: [PNIndex: [PNIndex]] = [:] public var count: Int { objects.count } @@ -32,7 +33,9 @@ public struct PNFlatTree { "Parent index if non-nil \(Int.nil) must refer to an exisitng node") assert(parentIdx >= Int.nil, "Parent index value cannot be lower than 0 or different than \(Int.nil)") + let newIdx = objects.count objects.append(PNFlatTreeNode(parentIdx: parentIdx, data: data)) + childrenMap[parentIdx, default: []].append(newIdx) } public subscript(index: PNIndex) -> PNFlatTreeNode { get { @@ -42,7 +45,7 @@ public struct PNFlatTree { } } public func children(of idx: PNIndex) -> [PNIndex] { - objects.indices.filter { objects[$0].parentIdx == idx } + childrenMap[idx] ?? [] } public func descendants(of idx: PNIndex) -> [PNIndex] { let nearChildren = children(of: idx) diff --git a/EngineTests/Engine/DataStructures/PNFlatTree+Tests.swift b/EngineTests/Engine/DataStructures/PNFlatTree+Tests.swift index 8f433058..4035af2d 100644 --- a/EngineTests/Engine/DataStructures/PNFlatTree+Tests.swift +++ b/EngineTests/Engine/DataStructures/PNFlatTree+Tests.swift @@ -85,4 +85,43 @@ class PNFlatTreeTests: XCTestCase { XCTAssertEqual(tree[0].data, 100) XCTAssertEqual(tree[0].parentIdx, .nil) } + func testMultipleRoots() throws { + var tree = PNFlatTree() + tree.add(parentIdx: .nil, data: 10) + tree.add(parentIdx: .nil, data: 20) + tree.add(parentIdx: .nil, data: 30) + XCTAssertEqual(tree.children(of: .nil), [0, 1, 2]) + } + func testLeafNodeHasNoChildren() throws { + var tree = PNFlatTree() + tree.add(parentIdx: .nil, data: 10) + tree.add(parentIdx: 0, data: 20) + XCTAssertEqual(tree.children(of: 1), []) + XCTAssertEqual(tree.descendants(of: 1), []) + } + func testDescendantsThreeLevelsDeep() throws { + var tree = PNFlatTree() + tree.add(parentIdx: .nil, data: 0) // 0 + tree.add(parentIdx: 0, data: 1) // 1 + tree.add(parentIdx: 0, data: 2) // 2 + tree.add(parentIdx: 1, data: 3) // 3 + tree.add(parentIdx: 1, data: 4) // 4 + tree.add(parentIdx: 2, data: 5) // 5 + // descendants of root: children before their own descendants (BFS-like) + XCTAssertEqual(tree.descendants(of: .nil), [0, 1, 2, 3, 4, 5]) + // subtree rooted at node 1 + XCTAssertEqual(tree.descendants(of: 1), [3, 4]) + // subtree rooted at node 2 + XCTAssertEqual(tree.descendants(of: 2), [5]) + } + func testDescendantsPerformance() { + var tree = PNFlatTree() + tree.add(parentIdx: .nil, data: 0) + for i in 0 ..< 999 { + tree.add(parentIdx: i, data: i + 1) + } + measure { + _ = tree.descendants(of: .nil) + } + } } From e21e0511a8ad5d73dca6f687c147eefdd201b458 Mon Sep 17 00:00:00 2001 From: Tom Riddle Date: Fri, 1 May 2026 20:10:22 +0200 Subject: [PATCH 4/5] Speed up ssao implementation --- Engine/Shaders/SSAO.metal | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Engine/Shaders/SSAO.metal b/Engine/Shaders/SSAO.metal index 6249fe3d..8643dbf3 100644 --- a/Engine/Shaders/SSAO.metal +++ b/Engine/Shaders/SSAO.metal @@ -44,19 +44,24 @@ kernel void kernelSSAO(texture2d nm [[texture(kAttributeSsao float3 tangent = normalize(randomVector - normal * dot(randomVector, normal)); float3 bitangent = normalize(cross(normal, tangent)); float3x3 TBN = float3x3(tangent, bitangent, normal); + float fx = camera.projectionMatrix[0][0]; + float fy = camera.projectionMatrix[1][1]; + float cx = camera.projectionMatrix[2][0]; + float cy = camera.projectionMatrix[2][1]; + float invNegCenterW = 1.0 / (-worldPosition.z); + float2 ndcToTexScale = float2(resolutionMultiplier.x * 0.5, -resolutionMultiplier.y * 0.5); + float2 ndcToTexBias = float2(resolutionMultiplier.x * 0.5, resolutionMultiplier.y * 0.5); + float occlusionThreshold = worldPosition.z + comparisonBias; half occlusion = 0.0; for(int i = 0; i < sampleCount; ++i) { - float3 neighbourWorldPosition = worldPosition + (TBN * samples[i]); - float4 neighbourClipPosition = camera.projectionMatrix * float4(neighbourWorldPosition, 1); - neighbourClipPosition /= neighbourClipPosition.w; - neighbourClipPosition = neighbourClipPosition * 0.5 + 0.5; - neighbourClipPosition.y = (1 - neighbourClipPosition.y); - neighbourClipPosition.xy *= resolutionMultiplier; - uint2 sampleXY = uint2(neighbourClipPosition.xy); + float3 samplePos = worldPosition + (TBN * samples[i]); + float2 sampleNDC = float2(fx * samplePos.x + cx * samplePos.z, + fy * samplePos.y + cy * samplePos.z) * invNegCenterW; + uint2 sampleXY = uint2(sampleNDC * ndcToTexScale + ndcToTexBias); float neighbourDepth = pr.read(sampleXY).z; float depthDiff = abs(worldPosition.z - neighbourDepth); float rangeCheck = smoothstep(0.0, 1.0, radius / max(depthDiff, 1e-5)); - occlusion += (neighbourDepth >= worldPosition.z + comparisonBias ? 1.0 : 0.0) * rangeCheck; + occlusion += (neighbourDepth >= occlusionThreshold ? 1.0 : 0.0) * rangeCheck; } half finalOcclusion = 1.0 - (occlusion / sampleCount); out.write(pow(finalOcclusion, power), positionXY); From 2314c444e0316f05c6d7fc1402973de5feef14a6 Mon Sep 17 00:00:00 2001 From: Tom Riddle Date: Fri, 1 May 2026 20:11:54 +0200 Subject: [PATCH 5/5] Poisson pcf --- Engine/Shaders/Common/Shadow.metal | 87 +++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 25 deletions(-) diff --git a/Engine/Shaders/Common/Shadow.metal b/Engine/Shaders/Common/Shadow.metal index 1fa601a9..d730ceac 100644 --- a/Engine/Shaders/Common/Shadow.metal +++ b/Engine/Shaders/Common/Shadow.metal @@ -6,24 +6,58 @@ using namespace metal; +// 16-point Poisson disk — well-distributed 2D offsets for spot/directional PCF. +constant float2 kPoissonDisk[16] = { + float2(-0.9420, -0.3991), float2( 0.9456, -0.7689), + float2(-0.0942, -0.9294), float2( 0.3450, 0.2939), + float2(-0.9159, 0.4577), float2(-0.8154, -0.8791), + float2(-0.3828, 0.2768), float2( 0.9748, 0.7565), + float2( 0.4432, -0.9751), float2( 0.5374, -0.4737), + float2(-0.2650, -0.4189), float2( 0.7920, 0.1909), + float2(-0.2419, 0.9971), float2(-0.8141, 0.9144), + float2( 0.1998, 0.7864), float2( 0.1438, -0.1410) +}; + +// 16-point Poisson sphere — well-distributed 3D offsets for point-light PCF. +constant float3 kPoissonSphere[16] = { + float3(-0.7499, -0.4811, 0.4152), float3( 0.4951, -0.7979, 0.0553), + float3(-0.1119, -0.3516, -0.8599), float3( 0.8192, 0.3498, -0.3551), + float3(-0.4228, 0.6617, 0.5956), float3( 0.1787, -0.9718, -0.1544), + float3(-0.9307, 0.1247, -0.3432), float3( 0.6124, 0.7895, 0.0421), + float3(-0.2041, 0.6812, -0.7027), float3( 0.7733, -0.2956, 0.5601), + float3(-0.5614, 0.1543, 0.8131), float3( 0.0875, 0.9912, -0.1001), + float3(-0.8447, -0.5293, -0.0758), float3( 0.3621, -0.1874, -0.9130), + float3( 0.0197, 0.4239, 0.9056), float3(-0.3001, -0.8849, 0.3552) +}; + float pcfDepth(metal::depth2d_array shadowMaps, uint layer, float2 sampleCoordinate, int2 samples, float countedDepth, float bias) { - constexpr sampler textureSampler(mag_filter::linear, min_filter::linear, mip_filter::linear); - float2 textureSize = float2(shadowMaps.get_width(), shadowMaps.get_height()); - float2 texelSize = float2(1.0f) / float2(textureSize); - float result = 0.0f; - for (auto i = -samples.x; i <= samples.x; ++i) { - for (auto j = -samples.y; j <= samples.y; ++j) { - float2 coordinate = sampleCoordinate + float2(i, j) * texelSize; - float depth = shadowMaps.sample(textureSampler, coordinate, layer); - result += countedDepth - bias > depth ? 1.0 : 0.0f; - } + constexpr sampler textureSampler(mag_filter::linear, min_filter::linear); + float2 texelSize = float2(1.0f) / float2(shadowMaps.get_width(), shadowMaps.get_height()); + float filterRadius = float(samples.x); + float threshold = countedDepth - bias; + + // Sentinel check: 4 samples spread ~90° apart across the disk. + // If all agree the pixel is uniformly lit or shadowed, skip the remaining 12 taps. + float s0 = threshold > shadowMaps.sample(textureSampler, sampleCoordinate + kPoissonDisk[ 0] * filterRadius * texelSize, layer) ? 1.0f : 0.0f; + float s4 = threshold > shadowMaps.sample(textureSampler, sampleCoordinate + kPoissonDisk[ 4] * filterRadius * texelSize, layer) ? 1.0f : 0.0f; + float s8 = threshold > shadowMaps.sample(textureSampler, sampleCoordinate + kPoissonDisk[ 8] * filterRadius * texelSize, layer) ? 1.0f : 0.0f; + float s12 = threshold > shadowMaps.sample(textureSampler, sampleCoordinate + kPoissonDisk[12] * filterRadius * texelSize, layer) ? 1.0f : 0.0f; + if (s0 == s4 && s4 == s8 && s8 == s12) + return s0; + + // Shadow edge: run all 16 taps, reusing the 4 sentinel results. + float result = s0 + s4 + s8 + s12; + for (int i = 0; i < 16; ++i) { + if (i == 0 || i == 4 || i == 8 || i == 12) continue; + float2 coord = sampleCoordinate + kPoissonDisk[i] * filterRadius * texelSize; + result += threshold > shadowMaps.sample(textureSampler, coord, layer) ? 1.0f : 0.0f; } - return result / float((samples.x * 2 + 1) * (samples.y * 2 + 1)); + return result / 16.0f; } float pcfDepth(metal::depthcube_array shadowMaps, @@ -33,19 +67,22 @@ float pcfDepth(metal::depthcube_array shadowMaps, float countedDepth, float bias, float offset) { - constexpr sampler sampler(mag_filter::linear, min_filter::linear, mip_filter::linear); - float shadow = 0.0f; - float3 stepSize = (2.0 * offset) / float3(samples); - - for (int xi = 0; xi < samples.x; ++xi) { - for (int yi = 0; yi < samples.y; ++yi) { - for (int zi = 0; zi < samples.z; ++zi) { - float3 offsetCoord = float3(xi, yi, zi) * stepSize - offset; - float depth = shadowMaps.sample(sampler, sampleCoordinate + offsetCoord, layer); - shadow += countedDepth - bias > depth ? 1.0f : 0.0f; - } - } + constexpr sampler s(mag_filter::linear, min_filter::linear); + float threshold = countedDepth - bias; + + // Sentinel check: 4 well-separated sphere samples. + float s0 = threshold > shadowMaps.sample(s, sampleCoordinate + kPoissonSphere[ 0] * offset, layer) ? 1.0f : 0.0f; + float s4 = threshold > shadowMaps.sample(s, sampleCoordinate + kPoissonSphere[ 4] * offset, layer) ? 1.0f : 0.0f; + float s8 = threshold > shadowMaps.sample(s, sampleCoordinate + kPoissonSphere[ 8] * offset, layer) ? 1.0f : 0.0f; + float s12 = threshold > shadowMaps.sample(s, sampleCoordinate + kPoissonSphere[12] * offset, layer) ? 1.0f : 0.0f; + if (s0 == s4 && s4 == s8 && s8 == s12) + return s0; + + // Shadow edge: run all 16 taps, reusing the 4 sentinel results. + float result = s0 + s4 + s8 + s12; + for (int i = 0; i < 16; ++i) { + if (i == 0 || i == 4 || i == 8 || i == 12) continue; + result += threshold > shadowMaps.sample(s, sampleCoordinate + kPoissonSphere[i] * offset, layer) ? 1.0f : 0.0f; } - - return clamp(shadow / float(samples.x * samples.y * samples.z), 0.0f, 1.0f); + return clamp(result / 16.0f, 0.0f, 1.0f); }