I’m engaged on a "glowing impact" put up processor. Principally, it has 4 passes:
-
render chosen entities (that I wish to apply glow impact) to a "masks texture"
-
blur the masks texture horizontally
-
blur the masks texture vertically
-
composite the double-blurred masks with the unique texture
It labored nicely on my massive display telephones (e.g. iPhone 15 Professional):

Nevertheless, it exhibits a grid of magenta sq. artifacts on my iPhone 13 mini:

My put up processor code appears to be like like this:
public func processFrame(context: ARView.PostProcessContext) -> Bool {
guard let commandBuffer = commandQueue.makeCommandBuffer() else {
return false
}
// Go 1: Render chosen entities to masks texture
let entityPassDescriptor: MTLRenderPassDescriptor = {
let descriptor = MTLRenderPassDescriptor()
descriptor.colorAttachments[0].texture = entityMaskTexture
descriptor.colorAttachments[0].loadAction = .clear
descriptor.colorAttachments[0].storeAction = .retailer
descriptor.colorAttachments[0].clearColor = MTLClearColor(crimson: 0, inexperienced: 0, blue: 0, alpha: 1)
return descriptor
}()
if let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: entityPassDescriptor) {
encoder.setRenderPipelineState(entityPipeline)
encoder.setCullMode(.again)
encoder.setFrontFacing(.counterClockwise)
renderEntitiesToMask(encoder: encoder, context: context)
encoder.endEncoding()
} else {
return false
}
// Go 2: Blur horizontal
let texelSize = SIMD2<Float>(1.0 / Float(width), 1.0 / Float(peak))
let blurHPassDescriptor: MTLRenderPassDescriptor = {
let descriptor = MTLRenderPassDescriptor()
descriptor.colorAttachments[0].texture = blurTexture1
descriptor.colorAttachments[0].loadAction = .clear
descriptor.colorAttachments[0].storeAction = .retailer
return descriptor
}()
if let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: blurHPassDescriptor) {
encoder.setRenderPipelineState(blurHPipeline)
encoder.setFragmentTexture(entityMaskTexture, index: 0)
var blurParams = SIMD3<Float>(texelSize.x, texelSize.y, blurRadius)
encoder.setFragmentBytes(&blurParams, size: MemoryLayout<SIMD3<Float>>.measurement, index: 0)
encoder.drawPrimitives(kind: .triangle, vertexStart: 0, vertexCount: 6)
encoder.endEncoding()
} else {
return false
}
// Go 3: Blur vertical
let blurVPassDescriptor: MTLRenderPassDescriptor = {
let descriptor = MTLRenderPassDescriptor()
descriptor.colorAttachments[0].texture = blurTexture2
descriptor.colorAttachments[0].loadAction = .clear
descriptor.colorAttachments[0].storeAction = .retailer
return descriptor
}()
if let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: blurVPassDescriptor) {
encoder.setRenderPipelineState(blurVPipeline)
encoder.setFragmentTexture(blurTexture1, index: 0)
var blurParams = SIMD3<Float>(texelSize.x, texelSize.y, blurRadius)
encoder.setFragmentBytes(&blurParams, size: MemoryLayout<SIMD3<Float>>.measurement, index: 0)
encoder.drawPrimitives(kind: .triangle, vertexStart: 0, vertexCount: 6)
encoder.endEncoding()
} else {
return false
}
// Go 4: Composite with glow shade
let targetPixelFormat = context.targetColorTexture.pixelFormat
let compositePipeline = getOrCreateCompositePipeline(pixelFormat: targetPixelFormat)
let compositePassDescriptor: MTLRenderPassDescriptor = {
let descriptor = MTLRenderPassDescriptor()
descriptor.colorAttachments[0].texture = context.targetColorTexture
descriptor.colorAttachments[0].loadAction = .load
descriptor.colorAttachments[0].storeAction = .retailer
return descriptor
}()
if let encoder = commandBuffer.makeRenderCommandEncoder(descriptor: compositePassDescriptor) {
encoder.setRenderPipelineState(compositePipeline)
encoder.setFragmentTexture(context.sourceColorTexture, index: 0)
encoder.setFragmentTexture(blurTexture2, index: 1) // Blurred masks
encoder.setFragmentTexture(entityMaskTexture, index: 2) // Authentic unblurred entity masks
encoder.setFragmentBytes(&glowColorVector, size: MemoryLayout<SIMD4<Float>>.measurement, index: 0)
encoder.drawPrimitives(kind: .triangle, vertexStart: 0, vertexCount: 6)
encoder.endEncoding()
} else {
return false
}
// need to manually commit, because it's our customized commandBuffer
commandBuffer.commit()
return true
}
Because it’s associated to display measurement, I suspected it was due to race situation the place RealityKit’s put up processor making an attempt to entry the ultimate texture that is both not prepared, or already be modified by the subsequent body . So I added this one line of code on the finish:
// this fixes the problem
commandBuffer.commit()
commandBuffer.waitUntilCompleted() // <- added this
return true
And it solves the issue. Nevertheless, this clearly blocks the CPU whereas ready for the GPU, which is not excellent (Not the top of the world because it’s render thread slightly than fundamental thread, however nonetheless i might desire to not block until when it is completely obligatory).
So I attempted utilizing semaphore and commandBuffer’s completion handler:
// this does not work, presumably as a result of race between 2 command buffers
frameSemaphore.wait()
guard let commandBuffer = commandQueue.makeCommandBuffer() else {
frameSemaphore.sign()
return false
}
...
commandBuffer.addCompletedHandler { _ in
self.frameSemaphore.sign()
}
commandBuffer.commit()
Surprisingly, this ends in the identical artifact. So I believe there is a race between my customized commandBuffer and RealityKit’s inside commandBuffer (aka context.commandBuffer).
Nevertheless, once I use context.commandBuffer slightly than my very own customized command buffer, I received a crash:
Execution of the command buffer was aborted as a result of an error throughout execution. Prompted GPU Timeout Error (00000002:kIOGPUCommandBufferCallbackErrorTimeout)
assertion failure: '0' (execute:line 79) Consumer enqueued/dedicated customized put up processing command buffer.
With a lot of back-and-forth with ChatGPT, it appears that evidently we will not use context.commandBuffer, which solely works for single-pass results. For multi-pass pipeline, now we have to make use of our personal command buffer to regulate execution order and guarantee every cross completes earlier than the subsequent begins (I’m new to RealityKit/Steel so I’m not positive whether or not that is true).
So I attempted utilizing my customized commandBuffer for the primary 3 passes, and use context.commandBuffer for the 4th cross, and it labored magically with out utilizing waitUntilCompleted.
// this additionally works, however feels hacky with 2 buffers
guard let intermediateBuffer = commandQueue.makeCommandBuffer() else {
return false
}
// use intermediateBuffer for the primary 3 passes
// ...
// then use finalBuffer for the 4th cross
let finalBuffer = context.commandBuffer
Whereas this works, it feels a bit hacky, and error inclined since I’ve to recollect the proper command buffer to make use of to keep away from race situation. Additionally it is laborious for me to be concern that this prevented race situation utterly (it might be simply this explicit iPhone 13 mini and this explicit code setup, very laborious to inform). I’m wondering if there’s a greater answer?
If you’re focused on reproducing it, beneath is my metallic file (I can add the entire undertaking too should you like, however simply any strawman concept can be welcome)
#embrace <metal_stdlib>
utilizing namespace metallic;
struct GlowVertexOut {
float4 place [[position]];
float2 uv;
};
// Vertex shader for full-screen quad
vertex GlowVertexOut glow_vertex_shader(uint vertexID [[vertex_id]]) {
GlowVertexOut out;
// Generate full-screen quad
const float2 positions[6] = {
float2(-1.0, -1.0),
float2( 1.0, -1.0),
float2(-1.0, 1.0),
float2(-1.0, 1.0),
float2( 1.0, -1.0),
float2( 1.0, 1.0)
};
float2 pos = positions[vertexID];
out.place = float4(pos, 0.0, 1.0);
out.uv = pos * 0.5 + 0.5;
out.uv.y = 1.0 - out.uv.y; // Flip Y
return out;
}
// MARK: - Entity Masks Rendering
struct GlowEntityVertexOut {
float4 place [[position]];
};
// Vertex shader for rendering entities to masks
// Takes vertex positions from buffer (extracted from MeshResource.contents)
vertex GlowEntityVertexOut glow_entity_mask_vertex(uint vertexID [[vertex_id]],
fixed float4x4& mvpMatrix [[buffer(0)]],
fixed float3* vertices [[buffer(1)]]) {
GlowEntityVertexOut out;
float3 place = vertices[vertexID];
out.place = mvpMatrix * float4(place, 1.0);
return out;
}
// Fragment shader for entity masks - outputs white for chosen entities
fragment float glow_entity_mask_fragment() {
return 1.0; // White = chosen entity
}
// MARK: - Gaussian Blur
// Shared Gaussian blur weights (sum = 1.0 for correct normalization)
// {0.2270270270, 0.1945945946, 0.1216216216, 0.0540540541, 0.0162162162};
// Nevertheless, the gaussian blur weights does not look good (too clear).
fixed float kGaussianWeights[5] = {0.8, 0.7, 0.5, 0.2, 0.0162162162};
// Base offsets multiplied by blur radius
fixed float kBaseOffsets[5] = {0.0, 1.0, 2.0, 3.0, 4.0};
// Gaussian blur horizontal cross
// Takes single-channel masks (r8) and outputs blurred masks
fragment float4 glow_blur_horizontal(GlowVertexOut in [[stage_in]],
texture2d<float> inputTexture [[texture(0)]],
fixed float3& blurParams [[buffer(0)]]) {
constexpr sampler blurSampler(mag_filter::linear, min_filter::linear, handle::clamp_to_edge);
float2 texelSize = blurParams.xy;
float blurRadius = blurParams.z;
// Pattern middle pixel
float consequence = inputTexture.pattern(blurSampler, in.uv).r * kGaussianWeights[0];
// Pattern neighboring pixels (each instructions)
for (int i = 1; i < 5; i++) {
float2 offset = float2(kBaseOffsets[i] * blurRadius * texelSize.x, 0.0);
consequence += inputTexture.pattern(blurSampler, in.uv + offset).r * kGaussianWeights[i];
consequence += inputTexture.pattern(blurSampler, in.uv - offset).r * kGaussianWeights[i];
}
// Return blurred consequence (solely crimson channel used)
return float4(consequence, 0.0, 0.0, 1.0);
}
// Gaussian blur vertical cross
fragment float4 glow_blur_vertical(GlowVertexOut in [[stage_in]],
texture2d<float> inputTexture [[texture(0)]],
fixed float3& blurParams [[buffer(0)]]) {
constexpr sampler blurSampler(mag_filter::linear, min_filter::linear, handle::clamp_to_edge);
float2 texelSize = blurParams.xy;
float blurRadius = blurParams.z;
// Pattern middle pixel
float consequence = inputTexture.pattern(blurSampler, in.uv).r * kGaussianWeights[0];
// Pattern neighboring pixels (each instructions)
for (int i = 1; i < 5; i++) {
float2 offset = float2(0.0, kBaseOffsets[i] * blurRadius * texelSize.y);
consequence += inputTexture.pattern(blurSampler, in.uv + offset).r * kGaussianWeights[i];
consequence += inputTexture.pattern(blurSampler, in.uv - offset).r * kGaussianWeights[i];
}
// Return blurred consequence (solely crimson channel used)
return float4(consequence, 0.0, 0.0, 1.0);
}
// Remaining composite shader
// Blends the blurred entity masks with the unique scene
fragment float4 glow_composite(GlowVertexOut in [[stage_in]],
texture2d<float> colorTexture [[texture(0)]],
texture2d<float> blurredMaskTexture [[texture(1)]],
texture2d<float> entityMaskTexture [[texture(2)]],
fixed float4& glowColor [[buffer(0)]]) {
constexpr sampler textureSampler(mag_filter::linear, min_filter::linear);
constexpr sampler sharpSampler(mag_filter::nearest, min_filter::nearest);
float4 originalColor = colorTexture.pattern(textureSampler, in.uv);
float4 blurredMask = blurredMaskTexture.pattern(textureSampler, in.uv);
float entityMask = entityMaskTexture.pattern(sharpSampler, in.uv).r;
float blurredAlpha = blurredMask.r;
// If entityMask == 1, we're ON the article itself, do not apply glow
if (entityMask == 1) {
return originalColor;
}
// Apply glow solely round edges, utilizing alpha to regulate depth
float glowAmount = blurredAlpha * glowColor.a;
// If there is a border, mix glow shade
if (glowAmount > 0.001) {
// Mix glow shade over authentic shade primarily based on glowAmount
float3 finalRGB = combine(originalColor.rgb, glowColor.rgb, glowAmount);
// Hold alpha no less than as excessive as authentic (do not make opaque areas clear)
float finalAlpha = max(originalColor.a, glowAmount);
return float4(finalRGB, finalAlpha);
}
return originalColor;
}

