Last active
March 11, 2021 15:04
-
-
Save lukaszmargielewski/0a3b16d4661dd7d7e00d to your computer and use it in GitHub Desktop.
iOS Metal client side code for compute pipeline
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// For stackoverflow question: | |
// http://stackoverflow.com/questions/30445801/ios8-metal-compute-pipeline-slower-than-cpu | |
// I. CODE INVOKED ONCE, AT THE INIT STAGE: | |
_mtlCharTable = [_mtlDevice newBufferWithBytes:_charTable->pointer length:_charTable->bytesTotal options:0]; | |
_mtlSearchMasks = [_mtlDevice newBufferWithBytesNoCopy:_searchIndexes.mask length:_searchIndexes.bytesTotalMask options:0 deallocator:nil]; | |
//iPhone 6: maxTotalThreadsPerThreadgroup: 512, threadExecutionWidth: 32 | |
uint threadsPerThreadGroup = [_mtlComputePipelineState maxTotalThreadsPerThreadgroup];// / 4.0; | |
m_ThreadgroupSize = MTLSizeMake(threadsPerThreadGroup, 1, 1); | |
// Calculate the compute kernel's width and height | |
NSUInteger nThreadCount = (_charTable->rowCount + m_ThreadgroupSize.width - 1) / m_ThreadgroupSize.width; | |
// Set the compute kernel's thread count | |
m_ThreadgroupCount = MTLSizeMake(nThreadCount, 1, 1); | |
/////////////////////////////////////////////////////////////////// | |
// II. SEARCH CODE (Invoked every time search phrase changes): | |
dispatch_semaphore_wait(m_InflightSemaphore, DISPATCH_TIME_FOREVER); | |
uint64_t ts = mach_absolute_time(); | |
const uint sPhraseLenght = (uint)searchPhrase.length; | |
const uint charsPerRow = (uint)_charTable->charsPerRow; | |
const uint rowCount = (uint)_charTable->rowCount; | |
const char *sPhrase = [searchPhrase.lowercaseString UTF8String]; | |
[_mtlCommandQueue insertDebugCaptureBoundary]; | |
id<MTLCommandBuffer> commandBuffer = [_mtlCommandQueue commandBuffer]; | |
id<MTLComputeCommandEncoder> commandEncoder = [commandBuffer computeCommandEncoder]; | |
[commandEncoder setComputePipelineState:_mtlComputePipelineState]; | |
[commandEncoder setBuffer:_mtlCharTable offset:0 atIndex:0]; | |
[commandEncoder setBuffer:_mtlSearchMasks offset:0 atIndex:5]; | |
[commandEncoder setBytes:&charsPerRow length:sizeof(uint) atIndex:1]; | |
[commandEncoder setBytes:&rowCount length:sizeof(uint) atIndex:2]; | |
[commandEncoder setBytes:sPhrase length:sPhraseLenght * sizeof(char) atIndex:3]; | |
[commandEncoder setBytes:&sPhraseLenght length:sizeof(uint) atIndex:4]; | |
[commandEncoder dispatchThreadgroups:m_ThreadgroupCount | |
threadsPerThreadgroup:m_ThreadgroupSize]; | |
[commandEncoder endEncoding]; | |
uint64_t duratione = mach_absolute_time() - ts; | |
[commandBuffer addCompletedHandler:^(id <MTLCommandBuffer> cmdb) | |
{ | |
dispatch_semaphore_signal(dispatchSemaphore); | |
uint64_t duration = mach_absolute_time() - ts; | |
// consolidate: | |
... | |
NSLog(@"status: %lu duration: %f sec (encoding: %f sec) | search count: %i", (unsigned long)cmdb.status, cpuTicksToMiliseconds(duration) / 1000.0f, cpuTicksToMiliseconds(duratione) / 1000.0f, searchCount); | |
dispatch_async(dispatch_get_main_queue(), ^{ | |
completionBlock(si, YES); | |
}); | |
}]; | |
// Commit the command buffer | |
[commandBuffer commit]; | |
[cq insertDebugCaptureBoundary]; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
thx for sharing this - that did help me ... even if 4 years after the post ;-)