Created
November 16, 2023 14:08
-
-
Save ctreffs/2524d9fbdcf35894178605f7b9b6e820 to your computer and use it in GitHub Desktop.
Processes ARKits' ARFrame->capturedImage CVPixelBuffer according to the documentation into an sRGB image
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import CoreImage | |
import CoreVideo | |
import ImageIO | |
import Metal | |
/// Processes ARKits' ARFrame->capturedImage CVPixelBuffer according to the documentation into an sRGB image. | |
/// | |
/// ARKit captures pixel buffers in a full-range planar YCbCr format (also known as YUV) format according to the ITU R. 601-4 standard. | |
/// (You can verify this by checking the kCVImageBufferYCbCrMatrixKey pixel buffer attachment.) | |
/// Unlike some uses of that standard, ARKit captures full-range color space values, not video-range values. | |
/// To correctly render these images on a device display, you'll need to access the luma and chroma planes of the pixel buffer and | |
/// convert full-range YCbCr values to an sRGB (or ITU R. 709) format according to the ITU-T T.871 specification. | |
/// <https://developer.apple.com/documentation/arkit/arframe/2867984-capturedimage> | |
public final class MTLARFrameProcessor { | |
public let ciContext: CIContext | |
public let colorSpace: CGColorSpace | |
let device: MTLDevice | |
let queue: MTLCommandQueue | |
let library: MTLLibrary | |
let pipelineStateCompute: MTLComputePipelineState | |
let textureCache: CVMetalTextureCache | |
var colorRGBTexture: MTLTexture! | |
var colorYContent: MTLTexture! | |
var colorCbCrContent: MTLTexture! | |
public init() throws { | |
colorSpace = try eval(CGColorSpace(name: CGColorSpace.sRGB), orThrow: .failedToCreateColorSpace(CGColorSpace.sRGB)) | |
let device = try eval(MTLCreateSystemDefaultDevice(), orThrow: .failedToCreateMTLDevice) | |
self.device = device | |
self.ciContext = CIContext(mtlDevice: device) | |
self.queue = try eval(device.makeCommandQueue(), orThrow: .failedToCreateMTLCommandQueue) | |
let library = try device.makeLibrary(source: Self.shader, options: nil) | |
self.library = library | |
let convertYUV2RGBFunc = try eval(library.makeFunction(name: "convertYCbCrToRGBA"), orThrow: .failedToMakeMTLFunction("convertYCbCrToRGBA")) | |
pipelineStateCompute = try device.makeComputePipelineState(function: convertYUV2RGBFunc) | |
self.textureCache = try eval({ | |
var textureCache: CVMetalTextureCache? | |
CVMetalTextureCacheCreate(nil, nil, device, nil, &textureCache) | |
return textureCache | |
}, orThrow: .failedToCreateCVMetalTextureCache) | |
} | |
public func convertToCIImage(capturedImage pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation) throws -> CIImage { | |
let width = CVPixelBufferGetWidth(pixelBuffer) | |
let height = CVPixelBufferGetHeight(pixelBuffer) | |
colorRGBTexture = try Self.createTexture( | |
metalDevice: device, | |
width: width, | |
height: height, | |
usage: [.shaderRead, .shaderWrite], | |
pixelFormat: .rgba32Float | |
) | |
colorYContent = try pixelBuffer.texture(withFormat: .r8Unorm, planeIndex: 0, addToCache: textureCache) | |
colorCbCrContent = try pixelBuffer.texture(withFormat: .rg8Unorm, planeIndex: 1, addToCache: textureCache) | |
let cmdBuffer = try eval(self.queue.makeCommandBuffer(), orThrow: .failedToMakeMTLCommandBuffer) | |
let computeEncoder = try eval(cmdBuffer.makeComputeCommandEncoder(), orThrow: .failedToMakeComputeCommandEncoder) | |
// Convert YUV to RGB because the guided filter needs RGB format. | |
computeEncoder.setComputePipelineState(pipelineStateCompute) | |
computeEncoder.setTexture(colorYContent, index: 0) | |
computeEncoder.setTexture(colorCbCrContent, index: 1) | |
computeEncoder.setTexture(colorRGBTexture, index: 2) | |
let threadgroupSize = MTLSizeMake( | |
pipelineStateCompute.threadExecutionWidth, | |
pipelineStateCompute.maxTotalThreadsPerThreadgroup / pipelineStateCompute.threadExecutionWidth, | |
1 | |
) | |
let threadgroupCount = MTLSize( | |
width: Int(ceil(Float(colorRGBTexture.width) / Float(threadgroupSize.width))), | |
height: Int(ceil(Float(colorRGBTexture.height) / Float(threadgroupSize.height))), | |
depth: 1) | |
computeEncoder.dispatchThreadgroups(threadgroupCount, threadsPerThreadgroup: threadgroupSize) | |
computeEncoder.endEncoding() | |
cmdBuffer.commit() | |
cmdBuffer.waitUntilCompleted() | |
let ciImageOptions: [CIImageOption: Any] = [ | |
.colorSpace: colorSpace, | |
.applyOrientationProperty: true | |
] | |
var ciImage = try eval(CIImage(mtlTexture: self.colorRGBTexture, options: ciImageOptions), orThrow: .failedToCreateCIImageFromMTLTexture) | |
ciImage = ciImage.oriented(orientation) | |
return ciImage | |
} | |
public func convertToJPEG(capturedImage pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation, compressionLevel: Float = 1.0) throws -> Data { | |
let ciImage = try convertToCIImage(capturedImage: pixelBuffer, orientation: orientation) | |
let options: [CIImageRepresentationOption: Any] = [ | |
CIImageRepresentationOption(rawValue: kCGImageDestinationLossyCompressionQuality as String): compressionLevel | |
] | |
return try eval(self.ciContext.jpegRepresentation(of: ciImage, colorSpace: self.colorSpace, options: options), orThrow: .failedToCreateJPEGRepresentation) | |
} | |
public func convertToPNG(capturedImage pixelBuffer: CVPixelBuffer, orientation: CGImagePropertyOrientation) throws -> Data { | |
let ciImage = try convertToCIImage(capturedImage: pixelBuffer, orientation: orientation) | |
return try eval(self.ciContext.pngRepresentation(of: ciImage, format: .RGBA8, colorSpace: self.colorSpace), orThrow: .failedToCreatePNGRepresentation) | |
} | |
} | |
// MARK: - errors | |
extension MTLARFrameProcessor { | |
enum Error: Swift.Error { | |
case failedToCreateMTLDevice | |
case failedToCreateMTLCommandQueue | |
case failedToMakeMTLFunction(String) | |
case failedToMakeMTLTexture(MTLTextureDescriptor) | |
case failedToCreateMTLTextureFromCVPixelBuffer | |
case failedToCreateCVMetalTextureCache | |
case failedToMakeMTLCommandBuffer | |
case failedToMakeComputeCommandEncoder | |
case failedToCreateCIImageFromMTLTexture | |
case failedToCreateCGImageFromCIImage | |
case failedToCreateColorSpace(CFString) | |
case failedToCreateJPEGRepresentation | |
case failedToCreatePNGRepresentation | |
} | |
} | |
// MARK: - shader | |
extension MTLARFrameProcessor { | |
/// Direct copy of shader from Apple sample project: | |
/// <https://developer.apple.com/documentation/arkit/arkit_in_ios/environmental_analysis/displaying_a_point_cloud_using_scene_depth> | |
/// <https://developer.apple.com/documentation/arkit/arkit_in_ios/displaying_an_ar_experience_with_metal> | |
static let shader: String = """ | |
#include <metal_stdlib> | |
using namespace metal; | |
// Convert the Y and CbCr textures into a single RGBA texture. | |
kernel void convertYCbCrToRGBA(texture2d<float, access::read> colorYtexture [[texture(0)]], | |
texture2d<float, access::read> colorCbCrtexture [[texture(1)]], | |
texture2d<float, access::write> colorRGBTexture [[texture(2)]], | |
uint2 gid [[thread_position_in_grid]]) | |
{ | |
float y = colorYtexture.read(gid).r; | |
float2 uv = colorCbCrtexture.read(gid / 2).rg; | |
const float4x4 ycbcrToRGBTransform = float4x4( | |
float4(+1.0000f, +1.0000f, +1.0000f, +0.0000f), | |
float4(+0.0000f, -0.3441f, +1.7720f, +0.0000f), | |
float4(+1.4020f, -0.7141f, +0.0000f, +0.0000f), | |
float4(-0.7010f, +0.5291f, -0.8860f, +1.0000f) | |
); | |
// Sample Y and CbCr textures to get the YCbCr color at the given texture | |
// coordinate. | |
float4 ycbcr = float4(y, uv.x, uv.y, 1.0f); | |
// Return the converted RGB color. | |
float4 colorSample = ycbcrToRGBTransform * ycbcr; | |
colorRGBTexture.write(colorSample, uint2(gid.xy)); | |
} | |
""" | |
} | |
// MARK: - texture helper | |
extension MTLARFrameProcessor { | |
// Create an empty texture. | |
static func createTexture(metalDevice: MTLDevice, width: Int, height: Int, usage: MTLTextureUsage, pixelFormat: MTLPixelFormat) throws -> MTLTexture { | |
let descriptor = MTLTextureDescriptor() | |
descriptor.pixelFormat = pixelFormat | |
descriptor.width = width | |
descriptor.height = height | |
descriptor.usage = usage | |
let resTexture = try eval(metalDevice.makeTexture(descriptor: descriptor), orThrow: .failedToMakeMTLTexture(descriptor)) | |
return resTexture | |
} | |
} | |
// Enable `CVPixelBuffer` to output an `MTLTexture`. | |
extension CVPixelBuffer { | |
func texture(withFormat pixelFormat: MTLPixelFormat, planeIndex: Int, addToCache cache: CVMetalTextureCache) throws -> MTLTexture { | |
assert(CVPixelBufferGetIOSurface(self) != nil, "CVPixelBuffer must be backed by an IOSurface") | |
let width = CVPixelBufferGetWidthOfPlane(self, planeIndex) | |
let height = CVPixelBufferGetHeightOfPlane(self, planeIndex) | |
var cvtexture: CVMetalTexture? | |
let status = CVMetalTextureCacheCreateTextureFromImage(nil, cache, self, nil, pixelFormat, width, height, planeIndex, &cvtexture) | |
guard | |
status == kCVReturnSuccess, | |
let cvtexture, | |
let texture = CVMetalTextureGetTexture(cvtexture) | |
else { | |
throw MTLARFrameProcessor.Error.failedToCreateMTLTextureFromCVPixelBuffer | |
} | |
return texture | |
} | |
} | |
// MARK: - eval helper | |
private func eval<R>(_ block: @autoclosure @escaping () -> R?, orThrow error: MTLARFrameProcessor.Error) throws -> R { | |
try evaluate(block, orThrow: error) | |
} | |
private func eval<R>(_ block: @escaping () -> R?, orThrow error: MTLARFrameProcessor.Error) throws -> R { | |
try evaluate(block, orThrow: error) | |
} | |
private func evaluate<R, E: Swift.Error>(_ block: @escaping () -> R?, orThrow error: E) throws -> R { | |
guard let result: R = block() else { | |
throw error | |
} | |
return result | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment