Created
April 14, 2025 02:27
-
-
Save zeux/e733b701b15cd684b2581dbd858f94ad to your computer and use it in GitHub Desktop.
Code to build Metal BVH and compute total compacted size, to accompany https://zeux.io/2025/03/31/measuring-acceleration-structures/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// To build: | |
// clang++ metalbvh.mm src/scene.cpp extern/meshoptimizer/src/*.cpp src/extern.cpp -I extern/glm -I extern/fast_obj -I extern/cgltf -I extern/meshoptimizer/src -framework Metal -framework Foundation -framework QuartzCore -lobjc -O2 -o metalbvh | |
#include "common.h" | |
#include "scene.h" | |
#import <Metal/Metal.h> | |
#import <MetalKit/MetalKit.h> | |
#import <Foundation/Foundation.h> | |
int main(int argc, char** argv) | |
{ | |
if (argc < 2) | |
{ | |
printf("Usage: %s <scene_file>\n", argv[0]); | |
return 1; | |
} | |
Geometry geometry; | |
std::vector<Material> materials; | |
std::vector<MeshDraw> draws; | |
std::vector<Animation> animations; | |
std::vector<std::string> texturePaths; | |
// material index 0 is always dummy | |
materials.resize(1); | |
materials[0].diffuseFactor = vec4(1); | |
Camera camera; | |
camera.position = { 0.0f, 0.0f, 0.0f }; | |
camera.orientation = { 0.0f, 0.0f, 0.0f, 1.0f }; | |
camera.fovY = glm::radians(70.f); | |
camera.znear = 0.1f; | |
vec3 sunDirection = normalize(vec3(1.0f, 1.0f, 1.0f)); | |
bool sceneMode = false; | |
bool fastMode = getenv("FAST") && atoi(getenv("FAST")); | |
if (!loadScene(geometry, materials, draws, texturePaths, animations, camera, sunDirection, argv[1], false, fastMode)) | |
{ | |
printf("Error: scene %s failed to load\n", argv[1]); | |
return 1; | |
} | |
// Create Metal device and buffers | |
id<MTLDevice> device = MTLCreateSystemDefaultDevice(); | |
assert(device && "Failed to create Metal device"); | |
assert([device supportsRaytracing]); | |
// Create command queue | |
id<MTLCommandQueue> commandQueue = [device newCommandQueue]; | |
assert(commandQueue && "Failed to create Metal command queue"); | |
// Create geometry buffers | |
MTLResourceOptions options = MTLResourceStorageModeShared; | |
// Upload vertex buffer | |
id<MTLBuffer> vertexBuffer = [device newBufferWithBytes:geometry.vertices.data() | |
length:geometry.vertices.size() * sizeof(Vertex) | |
options:options]; | |
assert(vertexBuffer && "Failed to create vertex buffer"); | |
// Upload index buffer | |
id<MTLBuffer> indexBuffer = [device newBufferWithBytes:geometry.indices.data() | |
length:geometry.indices.size() * sizeof(uint32_t) | |
options:options]; | |
assert(indexBuffer && "Failed to create index buffer"); | |
// Create primitive acceleration structures for each mesh | |
std::vector<id<MTLAccelerationStructure>> primitiveAccelerationStructures; | |
primitiveAccelerationStructures.resize(geometry.meshes.size()); | |
size_t totalBlasSize = 0; | |
size_t totalTris = 0; | |
// First, calculate total sizes needed and create geometry descriptors | |
std::vector<MTLAccelerationStructureTriangleGeometryDescriptor*> geometryDescriptors; | |
std::vector<MTLPrimitiveAccelerationStructureDescriptor*> accelDescriptors; | |
std::vector<MTLAccelerationStructureSizes> accelSizes; | |
// Find maximum scratch buffer size needed | |
size_t maxScratchBufferSize = 0; | |
for (size_t i = 0; i < geometry.meshes.size(); ++i) { | |
const Mesh& mesh = geometry.meshes[i]; | |
const MeshLod& lod = mesh.lods[0]; // Use first LOD | |
// Create geometry descriptor | |
MTLAccelerationStructureTriangleGeometryDescriptor* geometryDescriptor = | |
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor]; | |
geometryDescriptor.vertexBuffer = vertexBuffer; | |
geometryDescriptor.vertexBufferOffset = mesh.vertexOffset * sizeof(Vertex); | |
geometryDescriptor.vertexStride = sizeof(Vertex); | |
// Specify the vertex position format and where to find it within the vertex | |
geometryDescriptor.vertexFormat = MTLAttributeFormatHalf3; | |
geometryDescriptor.vertexBufferOffset = offsetof(Vertex, vx); | |
// Set up indices | |
geometryDescriptor.indexBuffer = indexBuffer; | |
geometryDescriptor.indexBufferOffset = lod.indexOffset * sizeof(uint32_t); | |
geometryDescriptor.triangleCount = lod.indexCount / 3; | |
geometryDescriptor.indexType = MTLIndexTypeUInt32; | |
totalTris += geometryDescriptor.triangleCount; | |
// Create primitive acceleration structure descriptor | |
MTLPrimitiveAccelerationStructureDescriptor* accelDescriptor = | |
[MTLPrimitiveAccelerationStructureDescriptor descriptor]; | |
accelDescriptor.geometryDescriptors = @[geometryDescriptor]; | |
// Create a size estimate for the acceleration structure | |
MTLAccelerationStructureSizes sizes = [device accelerationStructureSizesWithDescriptor:accelDescriptor]; | |
totalBlasSize += sizes.accelerationStructureSize; | |
maxScratchBufferSize = std::max(maxScratchBufferSize, sizes.buildScratchBufferSize); | |
// Store descriptors for later use | |
geometryDescriptors.push_back(geometryDescriptor); | |
accelDescriptors.push_back(accelDescriptor); | |
accelSizes.push_back(sizes); | |
} | |
// Allocate single scratch buffer for all BLASes | |
id<MTLBuffer> scratchBuffer = [device newBufferWithLength:maxScratchBufferSize | |
options:MTLResourceStorageModePrivate]; | |
assert(scratchBuffer && "Failed to create scratch buffer"); | |
// Create a single command buffer for building all acceleration structures | |
id<MTLCommandBuffer> commandBuffer = [commandQueue commandBuffer]; | |
[commandBuffer setLabel:@"BuildAllAccelerationStructures"]; | |
// Create an acceleration structure command encoder | |
id<MTLAccelerationStructureCommandEncoder> commandEncoder = | |
[commandBuffer accelerationStructureCommandEncoder]; | |
// Build all acceleration structures in a single command buffer | |
for (size_t i = 0; i < geometry.meshes.size(); ++i) { | |
// Create the acceleration structure | |
id<MTLAccelerationStructure> accelStructure = | |
[device newAccelerationStructureWithSize:accelSizes[i].accelerationStructureSize]; | |
[accelStructure setLabel:[NSString stringWithFormat:@"PrimitiveAS_%zu", i]]; | |
// Build the acceleration structure | |
[commandEncoder buildAccelerationStructure:accelStructure | |
descriptor:accelDescriptors[i] | |
scratchBuffer:scratchBuffer | |
scratchBufferOffset:0]; | |
// Store the completed acceleration structure | |
primitiveAccelerationStructures[i] = accelStructure; | |
} | |
// End encoding and execute the command | |
[commandEncoder endEncoding]; | |
[commandBuffer commit]; | |
[commandBuffer waitUntilCompleted]; | |
// Perform BLAS compaction to save memory | |
id<MTLCommandBuffer> compactionCommandBuffer = [commandQueue commandBuffer]; | |
[compactionCommandBuffer setLabel:@"CompactBLAS"]; | |
id<MTLAccelerationStructureCommandEncoder> compactionEncoder = | |
[compactionCommandBuffer accelerationStructureCommandEncoder]; | |
// Create a single buffer to hold all compacted sizes | |
id<MTLBuffer> compactedSizesBuffer = [device newBufferWithLength:geometry.meshes.size() * sizeof(uint32_t) | |
options:MTLResourceStorageModeShared]; | |
// Get all compacted sizes in one batch | |
for (size_t i = 0; i < geometry.meshes.size(); ++i) { | |
// Write each compacted size to the appropriate offset in the buffer | |
[compactionEncoder writeCompactedAccelerationStructureSize:primitiveAccelerationStructures[i] | |
toBuffer:compactedSizesBuffer | |
offset:i * sizeof(uint32_t)]; | |
} | |
// End encoding and execute the command to get all sizes at once | |
[compactionEncoder endEncoding]; | |
[compactionCommandBuffer commit]; | |
[compactionCommandBuffer waitUntilCompleted]; | |
size_t compactedBlasSize = 0; | |
for (size_t i = 0; i < geometry.meshes.size(); ++i) { | |
compactedBlasSize += (static_cast<uint32_t*>([compactedSizesBuffer contents]))[i]; | |
} | |
printf("%.2f MB of BLAS memory allocated\n", totalBlasSize / 1e6); | |
printf("%.2f MB of BLAS memory after compaction (%.1f%% of original)\n", | |
compactedBlasSize / 1e6, (compactedBlasSize * 100.0) / totalBlasSize); | |
printf("%d total triangles, %.1f bytes/triangle (compacted)\n", int(totalTris), | |
(double(compactedBlasSize) / double(totalTris))); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment