Skip to content

Instantly share code, notes, and snippets.

@zeux
Created April 14, 2025 02:27
Show Gist options
  • Save zeux/e733b701b15cd684b2581dbd858f94ad to your computer and use it in GitHub Desktop.
Save zeux/e733b701b15cd684b2581dbd858f94ad to your computer and use it in GitHub Desktop.
Code to build Metal BVH and compute total compacted size, to accompany https://zeux.io/2025/03/31/measuring-acceleration-structures/
// To build:
// clang++ metalbvh.mm src/scene.cpp extern/meshoptimizer/src/*.cpp src/extern.cpp -I extern/glm -I extern/fast_obj -I extern/cgltf -I extern/meshoptimizer/src -framework Metal -framework Foundation -framework QuartzCore -lobjc -O2 -o metalbvh
#include "common.h"
#include "scene.h"
#import <Metal/Metal.h>
#import <MetalKit/MetalKit.h>
#import <Foundation/Foundation.h>
int main(int argc, char** argv)
{
if (argc < 2)
{
printf("Usage: %s <scene_file>\n", argv[0]);
return 1;
}
Geometry geometry;
std::vector<Material> materials;
std::vector<MeshDraw> draws;
std::vector<Animation> animations;
std::vector<std::string> texturePaths;
// material index 0 is always dummy
materials.resize(1);
materials[0].diffuseFactor = vec4(1);
Camera camera;
camera.position = { 0.0f, 0.0f, 0.0f };
camera.orientation = { 0.0f, 0.0f, 0.0f, 1.0f };
camera.fovY = glm::radians(70.f);
camera.znear = 0.1f;
vec3 sunDirection = normalize(vec3(1.0f, 1.0f, 1.0f));
bool sceneMode = false;
bool fastMode = getenv("FAST") && atoi(getenv("FAST"));
if (!loadScene(geometry, materials, draws, texturePaths, animations, camera, sunDirection, argv[1], false, fastMode))
{
printf("Error: scene %s failed to load\n", argv[1]);
return 1;
}
// Create Metal device and buffers
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
assert(device && "Failed to create Metal device");
assert([device supportsRaytracing]);
// Create command queue
id<MTLCommandQueue> commandQueue = [device newCommandQueue];
assert(commandQueue && "Failed to create Metal command queue");
// Create geometry buffers
MTLResourceOptions options = MTLResourceStorageModeShared;
// Upload vertex buffer
id<MTLBuffer> vertexBuffer = [device newBufferWithBytes:geometry.vertices.data()
length:geometry.vertices.size() * sizeof(Vertex)
options:options];
assert(vertexBuffer && "Failed to create vertex buffer");
// Upload index buffer
id<MTLBuffer> indexBuffer = [device newBufferWithBytes:geometry.indices.data()
length:geometry.indices.size() * sizeof(uint32_t)
options:options];
assert(indexBuffer && "Failed to create index buffer");
// Create primitive acceleration structures for each mesh
std::vector<id<MTLAccelerationStructure>> primitiveAccelerationStructures;
primitiveAccelerationStructures.resize(geometry.meshes.size());
size_t totalBlasSize = 0;
size_t totalTris = 0;
// First, calculate total sizes needed and create geometry descriptors
std::vector<MTLAccelerationStructureTriangleGeometryDescriptor*> geometryDescriptors;
std::vector<MTLPrimitiveAccelerationStructureDescriptor*> accelDescriptors;
std::vector<MTLAccelerationStructureSizes> accelSizes;
// Find maximum scratch buffer size needed
size_t maxScratchBufferSize = 0;
for (size_t i = 0; i < geometry.meshes.size(); ++i) {
const Mesh& mesh = geometry.meshes[i];
const MeshLod& lod = mesh.lods[0]; // Use first LOD
// Create geometry descriptor
MTLAccelerationStructureTriangleGeometryDescriptor* geometryDescriptor =
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
geometryDescriptor.vertexBuffer = vertexBuffer;
geometryDescriptor.vertexBufferOffset = mesh.vertexOffset * sizeof(Vertex);
geometryDescriptor.vertexStride = sizeof(Vertex);
// Specify the vertex position format and where to find it within the vertex
geometryDescriptor.vertexFormat = MTLAttributeFormatHalf3;
geometryDescriptor.vertexBufferOffset = offsetof(Vertex, vx);
// Set up indices
geometryDescriptor.indexBuffer = indexBuffer;
geometryDescriptor.indexBufferOffset = lod.indexOffset * sizeof(uint32_t);
geometryDescriptor.triangleCount = lod.indexCount / 3;
geometryDescriptor.indexType = MTLIndexTypeUInt32;
totalTris += geometryDescriptor.triangleCount;
// Create primitive acceleration structure descriptor
MTLPrimitiveAccelerationStructureDescriptor* accelDescriptor =
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
accelDescriptor.geometryDescriptors = @[geometryDescriptor];
// Create a size estimate for the acceleration structure
MTLAccelerationStructureSizes sizes = [device accelerationStructureSizesWithDescriptor:accelDescriptor];
totalBlasSize += sizes.accelerationStructureSize;
maxScratchBufferSize = std::max(maxScratchBufferSize, sizes.buildScratchBufferSize);
// Store descriptors for later use
geometryDescriptors.push_back(geometryDescriptor);
accelDescriptors.push_back(accelDescriptor);
accelSizes.push_back(sizes);
}
// Allocate single scratch buffer for all BLASes
id<MTLBuffer> scratchBuffer = [device newBufferWithLength:maxScratchBufferSize
options:MTLResourceStorageModePrivate];
assert(scratchBuffer && "Failed to create scratch buffer");
// Create a single command buffer for building all acceleration structures
id<MTLCommandBuffer> commandBuffer = [commandQueue commandBuffer];
[commandBuffer setLabel:@"BuildAllAccelerationStructures"];
// Create an acceleration structure command encoder
id<MTLAccelerationStructureCommandEncoder> commandEncoder =
[commandBuffer accelerationStructureCommandEncoder];
// Build all acceleration structures in a single command buffer
for (size_t i = 0; i < geometry.meshes.size(); ++i) {
// Create the acceleration structure
id<MTLAccelerationStructure> accelStructure =
[device newAccelerationStructureWithSize:accelSizes[i].accelerationStructureSize];
[accelStructure setLabel:[NSString stringWithFormat:@"PrimitiveAS_%zu", i]];
// Build the acceleration structure
[commandEncoder buildAccelerationStructure:accelStructure
descriptor:accelDescriptors[i]
scratchBuffer:scratchBuffer
scratchBufferOffset:0];
// Store the completed acceleration structure
primitiveAccelerationStructures[i] = accelStructure;
}
// End encoding and execute the command
[commandEncoder endEncoding];
[commandBuffer commit];
[commandBuffer waitUntilCompleted];
// Perform BLAS compaction to save memory
id<MTLCommandBuffer> compactionCommandBuffer = [commandQueue commandBuffer];
[compactionCommandBuffer setLabel:@"CompactBLAS"];
id<MTLAccelerationStructureCommandEncoder> compactionEncoder =
[compactionCommandBuffer accelerationStructureCommandEncoder];
// Create a single buffer to hold all compacted sizes
id<MTLBuffer> compactedSizesBuffer = [device newBufferWithLength:geometry.meshes.size() * sizeof(uint32_t)
options:MTLResourceStorageModeShared];
// Get all compacted sizes in one batch
for (size_t i = 0; i < geometry.meshes.size(); ++i) {
// Write each compacted size to the appropriate offset in the buffer
[compactionEncoder writeCompactedAccelerationStructureSize:primitiveAccelerationStructures[i]
toBuffer:compactedSizesBuffer
offset:i * sizeof(uint32_t)];
}
// End encoding and execute the command to get all sizes at once
[compactionEncoder endEncoding];
[compactionCommandBuffer commit];
[compactionCommandBuffer waitUntilCompleted];
size_t compactedBlasSize = 0;
for (size_t i = 0; i < geometry.meshes.size(); ++i) {
compactedBlasSize += (static_cast<uint32_t*>([compactedSizesBuffer contents]))[i];
}
printf("%.2f MB of BLAS memory allocated\n", totalBlasSize / 1e6);
printf("%.2f MB of BLAS memory after compaction (%.1f%% of original)\n",
compactedBlasSize / 1e6, (compactedBlasSize * 100.0) / totalBlasSize);
printf("%d total triangles, %.1f bytes/triangle (compacted)\n", int(totalTris),
(double(compactedBlasSize) / double(totalTris)));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment