-
-
Save sheredom/523f02bbad2ae397d7ed255f3f3b5a7f to your computer and use it in GitHub Desktop.
| // This is free and unencumbered software released into the public domain. | |
| // | |
| // Anyone is free to copy, modify, publish, use, compile, sell, or | |
| // distribute this software, either in source code form or as a compiled | |
| // binary, for any purpose, commercial or non-commercial, and by any | |
| // means. | |
| // | |
| // In jurisdictions that recognize copyright laws, the author or authors | |
| // of this software dedicate any and all copyright interest in the | |
| // software to the public domain. We make this dedication for the benefit | |
| // of the public at large and to the detriment of our heirs and | |
| // successors. We intend this dedication to be an overt act of | |
| // relinquishment in perpetuity of all present and future rights to this | |
| // software under copyright law. | |
| // | |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
| // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
| // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
| // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
| // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
| // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
| // OTHER DEALINGS IN THE SOFTWARE. | |
| // | |
| // For more information, please refer to <http://unlicense.org/> | |
| #include "vulkan.h" | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #define BAIL_ON_BAD_RESULT(result) \ | |
| if (VK_SUCCESS != (result)) { fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); exit(-1); } | |
| VkResult vkGetBestTransferQueueNPH(VkPhysicalDevice physicalDevice, uint32_t* queueFamilyIndex) { | |
| uint32_t queueFamilyPropertiesCount = 0; | |
| vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyPropertiesCount, 0); | |
| VkQueueFamilyProperties* const queueFamilyProperties = (VkQueueFamilyProperties*)_alloca( | |
| sizeof(VkQueueFamilyProperties) * queueFamilyPropertiesCount); | |
| vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyPropertiesCount, queueFamilyProperties); | |
| // first try and find a queue that has just the transfer bit set | |
| for (uint32_t i = 0; i < queueFamilyPropertiesCount; i++) { | |
| // mask out the sparse binding bit that we aren't caring about (yet!) | |
| const VkQueueFlags maskedFlags = (~VK_QUEUE_SPARSE_BINDING_BIT & queueFamilyProperties[i].queueFlags); | |
| if (!((VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT) & maskedFlags) && | |
| (VK_QUEUE_TRANSFER_BIT & maskedFlags)) { | |
| *queueFamilyIndex = i; | |
| return VK_SUCCESS; | |
| } | |
| } | |
| // otherwise we'll prefer using a compute-only queue, | |
| // remember that having compute on the queue implicitly enables transfer! | |
| for (uint32_t i = 0; i < queueFamilyPropertiesCount; i++) { | |
| // mask out the sparse binding bit that we aren't caring about (yet!) | |
| const VkQueueFlags maskedFlags = (~VK_QUEUE_SPARSE_BINDING_BIT & queueFamilyProperties[i].queueFlags); | |
| if (!(VK_QUEUE_GRAPHICS_BIT & maskedFlags) && (VK_QUEUE_COMPUTE_BIT & maskedFlags)) { | |
| *queueFamilyIndex = i; | |
| return VK_SUCCESS; | |
| } | |
| } | |
| // lastly get any queue that'll work for us (graphics, compute or transfer bit set) | |
| for (uint32_t i = 0; i < queueFamilyPropertiesCount; i++) { | |
| // mask out the sparse binding bit that we aren't caring about (yet!) | |
| const VkQueueFlags maskedFlags = (~VK_QUEUE_SPARSE_BINDING_BIT & queueFamilyProperties[i].queueFlags); | |
| if ((VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT) & maskedFlags) { | |
| *queueFamilyIndex = i; | |
| return VK_SUCCESS; | |
| } | |
| } | |
| return VK_ERROR_INITIALIZATION_FAILED; | |
| } | |
| VkResult vkGetBestComputeQueueNPH(VkPhysicalDevice physicalDevice, uint32_t* queueFamilyIndex) { | |
| uint32_t queueFamilyPropertiesCount = 0; | |
| vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyPropertiesCount, 0); | |
| VkQueueFamilyProperties* const queueFamilyProperties = (VkQueueFamilyProperties*)_alloca( | |
| sizeof(VkQueueFamilyProperties) * queueFamilyPropertiesCount); | |
| vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyPropertiesCount, queueFamilyProperties); | |
| // first try and find a queue that has just the compute bit set | |
| for (uint32_t i = 0; i < queueFamilyPropertiesCount; i++) { | |
| // mask out the sparse binding bit that we aren't caring about (yet!) and the transfer bit | |
| const VkQueueFlags maskedFlags = (~(VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT) & | |
| queueFamilyProperties[i].queueFlags); | |
| if (!(VK_QUEUE_GRAPHICS_BIT & maskedFlags) && (VK_QUEUE_COMPUTE_BIT & maskedFlags)) { | |
| *queueFamilyIndex = i; | |
| return VK_SUCCESS; | |
| } | |
| } | |
| // lastly get any queue that'll work for us | |
| for (uint32_t i = 0; i < queueFamilyPropertiesCount; i++) { | |
| // mask out the sparse binding bit that we aren't caring about (yet!) and the transfer bit | |
| const VkQueueFlags maskedFlags = (~(VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT) & | |
| queueFamilyProperties[i].queueFlags); | |
| if (VK_QUEUE_COMPUTE_BIT & maskedFlags) { | |
| *queueFamilyIndex = i; | |
| return VK_SUCCESS; | |
| } | |
| } | |
| return VK_ERROR_INITIALIZATION_FAILED; | |
| } | |
| int main(int argc, const char * const argv[]) { | |
| (void)argc; | |
| (void)argv; | |
| const VkApplicationInfo applicationInfo = { | |
| VK_STRUCTURE_TYPE_APPLICATION_INFO, | |
| 0, | |
| "VKComputeSample", | |
| 0, | |
| "", | |
| 0, | |
| VK_MAKE_VERSION(1, 0, 9) | |
| }; | |
| const VkInstanceCreateInfo instanceCreateInfo = { | |
| VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, | |
| 0, | |
| 0, | |
| &applicationInfo, | |
| 0, | |
| 0, | |
| 0, | |
| 0 | |
| }; | |
| VkInstance instance; | |
| BAIL_ON_BAD_RESULT(vkCreateInstance(&instanceCreateInfo, 0, &instance)); | |
| uint32_t physicalDeviceCount = 0; | |
| BAIL_ON_BAD_RESULT(vkEnumeratePhysicalDevices(instance, &physicalDeviceCount, 0)); | |
| VkPhysicalDevice* const physicalDevices = (VkPhysicalDevice*)malloc( | |
| sizeof(VkPhysicalDevice) * physicalDeviceCount); | |
| BAIL_ON_BAD_RESULT(vkEnumeratePhysicalDevices(instance, &physicalDeviceCount, physicalDevices)); | |
| for (uint32_t i = 0; i < physicalDeviceCount; i++) { | |
| uint32_t queueFamilyIndex = 0; | |
| BAIL_ON_BAD_RESULT(vkGetBestComputeQueueNPH(physicalDevices[i], &queueFamilyIndex)); | |
| const float queuePrioritory = 1.0f; | |
| const VkDeviceQueueCreateInfo deviceQueueCreateInfo = { | |
| VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, | |
| 0, | |
| 0, | |
| queueFamilyIndex, | |
| 1, | |
| &queuePrioritory | |
| }; | |
| const VkDeviceCreateInfo deviceCreateInfo = { | |
| VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, | |
| 0, | |
| 0, | |
| 1, | |
| &deviceQueueCreateInfo, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0 | |
| }; | |
| VkDevice device; | |
| BAIL_ON_BAD_RESULT(vkCreateDevice(physicalDevices[i], &deviceCreateInfo, 0, &device)); | |
| VkPhysicalDeviceMemoryProperties properties; | |
| vkGetPhysicalDeviceMemoryProperties(physicalDevices[i], &properties); | |
| const int32_t bufferLength = 16384; | |
| const uint32_t bufferSize = sizeof(int32_t) * bufferLength; | |
| // we are going to need two buffers from this one memory | |
| const VkDeviceSize memorySize = bufferSize * 2; | |
| // set memoryTypeIndex to an invalid entry in the properties.memoryTypes array | |
| uint32_t memoryTypeIndex = VK_MAX_MEMORY_TYPES; | |
| for (uint32_t k = 0; k < properties.memoryTypeCount; k++) { | |
| if ((VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & properties.memoryTypes[k].propertyFlags) && | |
| (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & properties.memoryTypes[k].propertyFlags) && | |
| (memorySize < properties.memoryHeaps[properties.memoryTypes[k].heapIndex].size)) { | |
| memoryTypeIndex = k; | |
| break; | |
| } | |
| } | |
| BAIL_ON_BAD_RESULT(memoryTypeIndex == VK_MAX_MEMORY_TYPES ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS); | |
| const VkMemoryAllocateInfo memoryAllocateInfo = { | |
| VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | |
| 0, | |
| memorySize, | |
| memoryTypeIndex | |
| }; | |
| VkDeviceMemory memory; | |
| BAIL_ON_BAD_RESULT(vkAllocateMemory(device, &memoryAllocateInfo, 0, &memory)); | |
| int32_t *payload; | |
| BAIL_ON_BAD_RESULT(vkMapMemory(device, memory, 0, memorySize, 0, (void *)&payload)); | |
| for (uint32_t k = 1; k < memorySize / sizeof(int32_t); k++) { | |
| payload[k] = rand(); | |
| } | |
| vkUnmapMemory(device, memory); | |
| const VkBufferCreateInfo bufferCreateInfo = { | |
| VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | |
| 0, | |
| 0, | |
| bufferSize, | |
| VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | |
| VK_SHARING_MODE_EXCLUSIVE, | |
| 1, | |
| &queueFamilyIndex | |
| }; | |
| VkBuffer in_buffer; | |
| BAIL_ON_BAD_RESULT(vkCreateBuffer(device, &bufferCreateInfo, 0, &in_buffer)); | |
| BAIL_ON_BAD_RESULT(vkBindBufferMemory(device, in_buffer, memory, 0)); | |
| VkBuffer out_buffer; | |
| BAIL_ON_BAD_RESULT(vkCreateBuffer(device, &bufferCreateInfo, 0, &out_buffer)); | |
| BAIL_ON_BAD_RESULT(vkBindBufferMemory(device, out_buffer, memory, bufferSize)); | |
| enum { | |
| RESERVED_ID = 0, | |
| FUNC_ID, | |
| IN_ID, | |
| OUT_ID, | |
| GLOBAL_INVOCATION_ID, | |
| VOID_TYPE_ID, | |
| FUNC_TYPE_ID, | |
| INT_TYPE_ID, | |
| INT_ARRAY_TYPE_ID, | |
| STRUCT_ID, | |
| POINTER_TYPE_ID, | |
| ELEMENT_POINTER_TYPE_ID, | |
| INT_VECTOR_TYPE_ID, | |
| INT_VECTOR_POINTER_TYPE_ID, | |
| INT_POINTER_TYPE_ID, | |
| CONSTANT_ZERO_ID, | |
| CONSTANT_ARRAY_LENGTH_ID, | |
| LABEL_ID, | |
| IN_ELEMENT_ID, | |
| OUT_ELEMENT_ID, | |
| GLOBAL_INVOCATION_X_ID, | |
| GLOBAL_INVOCATION_X_PTR_ID, | |
| TEMP_LOADED_ID, | |
| BOUND | |
| }; | |
| enum { | |
| INPUT = 1, | |
| UNIFORM = 2, | |
| BUFFER_BLOCK = 3, | |
| ARRAY_STRIDE = 6, | |
| BUILTIN = 11, | |
| BINDING = 33, | |
| OFFSET = 35, | |
| DESCRIPTOR_SET = 34, | |
| GLOBAL_INVOCATION = 28, | |
| OP_TYPE_VOID = 19, | |
| OP_TYPE_FUNCTION = 33, | |
| OP_TYPE_INT = 21, | |
| OP_TYPE_VECTOR = 23, | |
| OP_TYPE_ARRAY = 28, | |
| OP_TYPE_STRUCT = 30, | |
| OP_TYPE_POINTER = 32, | |
| OP_VARIABLE = 59, | |
| OP_DECORATE = 71, | |
| OP_MEMBER_DECORATE = 72, | |
| OP_FUNCTION = 54, | |
| OP_LABEL = 248, | |
| OP_ACCESS_CHAIN = 65, | |
| OP_CONSTANT = 43, | |
| OP_LOAD = 61, | |
| OP_STORE = 62, | |
| OP_RETURN = 253, | |
| OP_FUNCTION_END = 56, | |
| OP_CAPABILITY = 17, | |
| OP_MEMORY_MODEL = 14, | |
| OP_ENTRY_POINT = 15, | |
| OP_EXECUTION_MODE = 16, | |
| OP_COMPOSITE_EXTRACT = 81, | |
| }; | |
| int32_t shader[] = { | |
| // first is the SPIR-V header | |
| 0x07230203, // magic header ID | |
| 0x00010000, // version 1.0.0 | |
| 0, // generator (optional) | |
| BOUND, // bound | |
| 0, // schema | |
| // OpCapability Shader | |
| (2 << 16) | OP_CAPABILITY, 1, | |
| // OpMemoryModel Logical Simple | |
| (3 << 16) | OP_MEMORY_MODEL, 0, 0, | |
| // OpEntryPoint GLCompute %FUNC_ID "f" %IN_ID %OUT_ID | |
| (4 << 16) | OP_ENTRY_POINT, 5, FUNC_ID, 0x00000066, | |
| // OpExecutionMode %FUNC_ID LocalSize 1 1 1 | |
| (6 << 16) | OP_EXECUTION_MODE, FUNC_ID, 17, 1, 1, 1, | |
| // next declare decorations | |
| (3 << 16) | OP_DECORATE, STRUCT_ID, BUFFER_BLOCK, | |
| (4 << 16) | OP_DECORATE, GLOBAL_INVOCATION_ID, BUILTIN, GLOBAL_INVOCATION, | |
| (4 << 16) | OP_DECORATE, IN_ID, DESCRIPTOR_SET, 0, | |
| (4 << 16) | OP_DECORATE, IN_ID, BINDING, 0, | |
| (4 << 16) | OP_DECORATE, OUT_ID, DESCRIPTOR_SET, 0, | |
| (4 << 16) | OP_DECORATE, OUT_ID, BINDING, 1, | |
| (4 << 16) | OP_DECORATE, INT_ARRAY_TYPE_ID, ARRAY_STRIDE, 4, | |
| (5 << 16) | OP_MEMBER_DECORATE, STRUCT_ID, 0, OFFSET, 0, | |
| // next declare types | |
| (2 << 16) | OP_TYPE_VOID, VOID_TYPE_ID, | |
| (3 << 16) | OP_TYPE_FUNCTION, FUNC_TYPE_ID, VOID_TYPE_ID, | |
| (4 << 16) | OP_TYPE_INT, INT_TYPE_ID, 32, 1, | |
| (4 << 16) | OP_CONSTANT, INT_TYPE_ID, CONSTANT_ARRAY_LENGTH_ID, bufferLength, | |
| (4 << 16) | OP_TYPE_ARRAY, INT_ARRAY_TYPE_ID, INT_TYPE_ID, CONSTANT_ARRAY_LENGTH_ID, | |
| (3 << 16) | OP_TYPE_STRUCT, STRUCT_ID, INT_ARRAY_TYPE_ID, | |
| (4 << 16) | OP_TYPE_POINTER, POINTER_TYPE_ID, UNIFORM, STRUCT_ID, | |
| (4 << 16) | OP_TYPE_POINTER, ELEMENT_POINTER_TYPE_ID, UNIFORM, INT_TYPE_ID, | |
| (4 << 16) | OP_TYPE_VECTOR, INT_VECTOR_TYPE_ID, INT_TYPE_ID, 3, | |
| (4 << 16) | OP_TYPE_POINTER, INT_VECTOR_POINTER_TYPE_ID, INPUT, INT_VECTOR_TYPE_ID, | |
| (4 << 16) | OP_TYPE_POINTER, INT_POINTER_TYPE_ID, INPUT, INT_TYPE_ID, | |
| // then declare constants | |
| (4 << 16) | OP_CONSTANT, INT_TYPE_ID, CONSTANT_ZERO_ID, 0, | |
| // then declare variables | |
| (4 << 16) | OP_VARIABLE, POINTER_TYPE_ID, IN_ID, UNIFORM, | |
| (4 << 16) | OP_VARIABLE, POINTER_TYPE_ID, OUT_ID, UNIFORM, | |
| (4 << 16) | OP_VARIABLE, INT_VECTOR_POINTER_TYPE_ID, GLOBAL_INVOCATION_ID, INPUT, | |
| // then declare function | |
| (5 << 16) | OP_FUNCTION, VOID_TYPE_ID, FUNC_ID, 0, FUNC_TYPE_ID, | |
| (2 << 16) | OP_LABEL, LABEL_ID, | |
| (5 << 16) | OP_ACCESS_CHAIN, INT_POINTER_TYPE_ID, GLOBAL_INVOCATION_X_PTR_ID, GLOBAL_INVOCATION_ID, CONSTANT_ZERO_ID, | |
| (4 << 16) | OP_LOAD, INT_TYPE_ID, GLOBAL_INVOCATION_X_ID, GLOBAL_INVOCATION_X_PTR_ID, | |
| (6 << 16) | OP_ACCESS_CHAIN, ELEMENT_POINTER_TYPE_ID, IN_ELEMENT_ID, IN_ID, CONSTANT_ZERO_ID, GLOBAL_INVOCATION_X_ID, | |
| (4 << 16) | OP_LOAD, INT_TYPE_ID, TEMP_LOADED_ID, IN_ELEMENT_ID, | |
| (6 << 16) | OP_ACCESS_CHAIN, ELEMENT_POINTER_TYPE_ID, OUT_ELEMENT_ID, OUT_ID, CONSTANT_ZERO_ID, GLOBAL_INVOCATION_X_ID, | |
| (3 << 16) | OP_STORE, OUT_ELEMENT_ID, TEMP_LOADED_ID, | |
| (1 << 16) | OP_RETURN, | |
| (1 << 16) | OP_FUNCTION_END, | |
| }; | |
| VkShaderModuleCreateInfo shaderModuleCreateInfo = { | |
| VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | |
| 0, | |
| 0, | |
| sizeof(shader), | |
| shader | |
| }; | |
| VkShaderModule shader_module; | |
| BAIL_ON_BAD_RESULT(vkCreateShaderModule(device, &shaderModuleCreateInfo, 0, &shader_module)); | |
| VkDescriptorSetLayoutBinding descriptorSetLayoutBindings[2] = { | |
| { | |
| 0, | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| 1, | |
| VK_SHADER_STAGE_COMPUTE_BIT, | |
| 0 | |
| }, | |
| { | |
| 1, | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| 1, | |
| VK_SHADER_STAGE_COMPUTE_BIT, | |
| 0 | |
| } | |
| }; | |
| VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = { | |
| VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | |
| 0, | |
| 0, | |
| 2, | |
| descriptorSetLayoutBindings | |
| }; | |
| VkDescriptorSetLayout descriptorSetLayout; | |
| BAIL_ON_BAD_RESULT(vkCreateDescriptorSetLayout(device, &descriptorSetLayoutCreateInfo, 0, &descriptorSetLayout)); | |
| VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { | |
| VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | |
| 0, | |
| 0, | |
| 1, | |
| &descriptorSetLayout, | |
| 0, | |
| 0 | |
| }; | |
| VkPipelineLayout pipelineLayout; | |
| BAIL_ON_BAD_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, 0, &pipelineLayout)); | |
| VkComputePipelineCreateInfo computePipelineCreateInfo = { | |
| VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | |
| 0, | |
| 0, | |
| { | |
| VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | |
| 0, | |
| 0, | |
| VK_SHADER_STAGE_COMPUTE_BIT, | |
| shader_module, | |
| "f", | |
| 0 | |
| }, | |
| pipelineLayout, | |
| 0, | |
| 0 | |
| }; | |
| VkPipeline pipeline; | |
| BAIL_ON_BAD_RESULT(vkCreateComputePipelines(device, 0, 1, &computePipelineCreateInfo, 0, &pipeline)); | |
| VkCommandPoolCreateInfo commandPoolCreateInfo = { | |
| VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, | |
| 0, | |
| 0, | |
| queueFamilyIndex | |
| }; | |
| VkDescriptorPoolSize descriptorPoolSize = { | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| 2 | |
| }; | |
| VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = { | |
| VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, | |
| 0, | |
| 0, | |
| 1, | |
| 1, | |
| &descriptorPoolSize | |
| }; | |
| VkDescriptorPool descriptorPool; | |
| BAIL_ON_BAD_RESULT(vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, 0, &descriptorPool)); | |
| VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = { | |
| VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, | |
| 0, | |
| descriptorPool, | |
| 1, | |
| &descriptorSetLayout | |
| }; | |
| VkDescriptorSet descriptorSet; | |
| BAIL_ON_BAD_RESULT(vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, &descriptorSet)); | |
| VkDescriptorBufferInfo in_descriptorBufferInfo = { | |
| in_buffer, | |
| 0, | |
| VK_WHOLE_SIZE | |
| }; | |
| VkDescriptorBufferInfo out_descriptorBufferInfo = { | |
| out_buffer, | |
| 0, | |
| VK_WHOLE_SIZE | |
| }; | |
| VkWriteDescriptorSet writeDescriptorSet[2] = { | |
| { | |
| VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |
| 0, | |
| descriptorSet, | |
| 0, | |
| 0, | |
| 1, | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| 0, | |
| &in_descriptorBufferInfo, | |
| 0 | |
| }, | |
| { | |
| VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | |
| 0, | |
| descriptorSet, | |
| 1, | |
| 0, | |
| 1, | |
| VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | |
| 0, | |
| &out_descriptorBufferInfo, | |
| 0 | |
| } | |
| }; | |
| vkUpdateDescriptorSets(device, 2, writeDescriptorSet, 0, 0); | |
| VkCommandPool commandPool; | |
| BAIL_ON_BAD_RESULT(vkCreateCommandPool(device, &commandPoolCreateInfo, 0, &commandPool)); | |
| VkCommandBufferAllocateInfo commandBufferAllocateInfo = { | |
| VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, | |
| 0, | |
| commandPool, | |
| VK_COMMAND_BUFFER_LEVEL_PRIMARY, | |
| 1 | |
| }; | |
| VkCommandBuffer commandBuffer; | |
| BAIL_ON_BAD_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &commandBuffer)); | |
| VkCommandBufferBeginInfo commandBufferBeginInfo = { | |
| VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | |
| 0, | |
| VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | |
| 0 | |
| }; | |
| BAIL_ON_BAD_RESULT(vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo)); | |
| vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | |
| vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, | |
| pipelineLayout, 0, 1, &descriptorSet, 0, 0); | |
| vkCmdDispatch(commandBuffer, bufferSize / sizeof(int32_t), 1, 1); | |
| BAIL_ON_BAD_RESULT(vkEndCommandBuffer(commandBuffer)); | |
| VkQueue queue; | |
| vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue); | |
| VkSubmitInfo submitInfo = { | |
| VK_STRUCTURE_TYPE_SUBMIT_INFO, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 1, | |
| &commandBuffer, | |
| 0, | |
| 0 | |
| }; | |
| BAIL_ON_BAD_RESULT(vkQueueSubmit(queue, 1, &submitInfo, 0)); | |
| BAIL_ON_BAD_RESULT(vkQueueWaitIdle(queue)); | |
| BAIL_ON_BAD_RESULT(vkMapMemory(device, memory, 0, memorySize, 0, (void *)&payload)); | |
| for (uint32_t k = 0, e = bufferSize / sizeof(int32_t); k < e; k++) { | |
| BAIL_ON_BAD_RESULT(payload[k + e] == payload[k] ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY); | |
| } | |
| } | |
| } |
UNASSIGNED-CoreValidation-Shader-InconsistentSpirv(ERROR / SPEC): msgNum: 0 - SPIR-V module not valid: Interface variable id <4> is used by entry point 'f' id <1>, but is not listed as an interface
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3int Input
the module validates fine for me after changing line 325 to:
(4 << 16) | OP_ENTRY_POINT, 5, FUNC_ID, 0x66, GLOBAL_INVOCATION_ID
UNASSIGNED-CoreValidation-Shader-InconsistentSpirv(ERROR / SPEC): msgNum: 0 - SPIR-V module not valid: Interface variable id <4> is used by entry point 'f' id <1>, but is not listed as an interface
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3int Inputthe module validates fine for me after changing line 325 to:
(4 << 16) | OP_ENTRY_POINT, 5, FUNC_ID, 0x66, GLOBAL_INVOCATION_ID
It works with me with (5 << 16) | OP_ENTRY_POINT, 5, FUNC_ID, 0x66, GLOBAL_INVOCATION_ID.
Seemingly (x << 16) means "the current instruction consists of x uint32_t values."
I measured times with the following code:
auto tm0 = std::chrono::high_resolution_clock::now();
BAIL_ON_BAD_RESULT(vkQueueSubmit(queue, 1, &submitInfo, 0));
BAIL_ON_BAD_RESULT(vkQueueWaitIdle(queue));
auto tm1 = std::chrono::high_resolution_clock::now();
BAIL_ON_BAD_RESULT(vkMapMemory(device, memory, 0, memorySize, 0, (void **)&payload));
auto tm2 = std::chrono::high_resolution_clock::now();
for (uint32_t k = 0, e = bufferSize / sizeof(int32_t); k < e; k++) {
BAIL_ON_BAD_RESULT(payload[k + e] == payload[k] ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY);
}
auto tm3 = std::chrono::high_resolution_clock::now();
using milliseconds = std::chrono::duration<double, std::milli>;
milliseconds tmProcess = tm1-tm0;
milliseconds tmMap = tm2-tm1;
milliseconds tmRead = tm3-tm2;
printf("Times (ms):\n process: %f\n map : %f\n read : %f",
tmProcess.count(), tmMap.count(), tmRead.count());
NVidia Geforce RTX 2080 super, Windows 10, Core I9-10980HK.
Times (ms):
process: 0.315900
map : 0.000300
read : 3.107100
Reading back the memory takes most of the time because a memory without VK_MEMORY_PROPERTY_HOST_CACHED_BIT was being selected. If I change memory selection code like this:
for (uint32_t k = 0; k < properties.memoryTypeCount; k++) {
if ((VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & properties.memoryTypes[k].propertyFlags) &&
(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & properties.memoryTypes[k].propertyFlags) &&
(VK_MEMORY_PROPERTY_HOST_CACHED_BIT & properties.memoryTypes[k].propertyFlags) &&
(memorySize < properties.memoryHeaps[properties.memoryTypes[k].heapIndex].size)) {
memoryTypeIndex = k;
break;
}
}
then I get
Times (ms):
process: 0.327000
map : 0.000500
read : 0.010500
Thank you to everyone who participates because it enriches the development of NPH.
VkQueueFamilyProperties* const queueFamilyProperties = (VkQueueFamilyProperties*)_alloca(
sizeof(VkQueueFamilyProperties) * queueFamilyPropertiesCount);
in Visual Studio Comunity advise me to change: _alloca for _malloca
This is great. I think learning Vulkan through the compute side first makes a lot more sense, rather than going through the very long graphics setup.