Skip to content

Instantly share code, notes, and snippets.

@Pikachuxxxx
Created August 6, 2025 06:26
Show Gist options
  • Save Pikachuxxxx/1cd0f1bf80057fee928461d8642262e7 to your computer and use it in GitHub Desktop.
Save Pikachuxxxx/1cd0f1bf80057fee928461d8642262e7 to your computer and use it in GitHub Desktop.
Vulkan test app to play with TimeQueries
#include <vulkan/vulkan.h>
#include <iostream>
#include <vector>
#include <stdexcept>
#include <chrono>
#include <cassert>
// Macro for VkQuery timing
#define VK_TIME_START(device, cmdBuffer, queryPool, queryIndex) \
vkCmdResetQueryPool(cmdBuffer, queryPool, queryIndex, 1); \
vkCmdWriteTimestamp(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, queryPool, queryIndex);
#define VK_TIME_END(device, cmdBuffer, queryPool, queryIndex) \
vkCmdWriteTimestamp(cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, queryPool, queryIndex + 1);
#define VK_TIME_GET_RESULT(device, queryPool, queryIndex, timestampPeriod, result) \
do { \
uint64_t timestamps[2]; \
VkResult res = vkGetQueryPoolResults(device, queryPool, queryIndex, 2, sizeof(timestamps), \
timestamps, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); \
if (res == VK_SUCCESS) { \
result = (timestamps[1] - timestamps[0]) * timestampPeriod / 1000000.0; /* Convert to milliseconds */ \
} else { \
result = -1.0; \
} \
} while(0)
// Error checking macro
#define VK_CHECK(call) \
do { \
VkResult result = call; \
if (result != VK_SUCCESS) { \
throw std::runtime_error("Vulkan call failed: " + std::to_string(result)); \
} \
} while(0)
class VulkanTestApp {
private:
VkInstance instance;
VkPhysicalDevice physicalDevice;
VkDevice device;
VkQueue queue;
VkCommandPool commandPool;
VkCommandBuffer commandBuffer;
VkQueryPool queryPool;
VkBuffer buffer;
VkDeviceMemory bufferMemory;
VkBuffer stagingBuffer;
VkDeviceMemory stagingBufferMemory;
uint32_t queueFamilyIndex;
float timestampPeriod;
public:
VulkanTestApp() {
initVulkan();
createBuffer();
createQueryPool();
recordCommands();
executeAndTime();
cleanup();
}
private:
void initVulkan() {
// Create instance
VkApplicationInfo appInfo{};
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
appInfo.pApplicationName = "VK Test App";
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.pEngineName = "No Engine";
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.apiVersion = VK_API_VERSION_1_0;
VkInstanceCreateInfo createInfo{};
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
createInfo.pApplicationInfo = &appInfo;
VK_CHECK(vkCreateInstance(&createInfo, nullptr, &instance));
// Pick physical device
uint32_t deviceCount = 0;
vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr);
if (deviceCount == 0) {
throw std::runtime_error("Failed to find GPUs with Vulkan support!");
}
std::vector<VkPhysicalDevice> devices(deviceCount);
vkEnumeratePhysicalDevices(instance, &deviceCount, devices.data());
physicalDevice = devices[0]; // Just pick the first one
// Get timestamp period for timing calculations
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
timestampPeriod = properties.limits.timestampPeriod;
// Find queue family
uint32_t queueFamilyCount = 0;
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, nullptr);
std::vector<VkQueueFamilyProperties> queueFamilies(queueFamilyCount);
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueFamilies.data());
queueFamilyIndex = UINT32_MAX;
for (uint32_t i = 0; i < queueFamilyCount; i++) {
if (queueFamilies[i].queueFlags & VK_QUEUE_COMPUTE_BIT) {
queueFamilyIndex = i;
break;
}
}
if (queueFamilyIndex == UINT32_MAX) {
throw std::runtime_error("Failed to find suitable queue family!");
}
// Create logical device
VkDeviceQueueCreateInfo queueCreateInfo{};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.queueFamilyIndex = queueFamilyIndex;
queueCreateInfo.queueCount = 1;
float queuePriority = 1.0f;
queueCreateInfo.pQueuePriorities = &queuePriority;
VkDeviceCreateInfo deviceCreateInfo{};
deviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
deviceCreateInfo.pQueueCreateInfos = &queueCreateInfo;
deviceCreateInfo.queueCreateInfoCount = 1;
VK_CHECK(vkCreateDevice(physicalDevice, &deviceCreateInfo, nullptr, &device));
// Get queue
vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue);
// Create command pool
VkCommandPoolCreateInfo poolInfo{};
poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
poolInfo.queueFamilyIndex = queueFamilyIndex;
VK_CHECK(vkCreateCommandPool(device, &poolInfo, nullptr, &commandPool));
// Allocate command buffer
VkCommandBufferAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
allocInfo.commandPool = commandPool;
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandBufferCount = 1;
VK_CHECK(vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer));
}
uint32_t findMemoryType(uint32_t typeFilter, VkMemoryPropertyFlags properties) {
VkPhysicalDeviceMemoryProperties memProperties;
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties);
for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) {
if ((typeFilter & (1 << i)) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) {
return i;
}
}
throw std::runtime_error("Failed to find suitable memory type!");
}
void createBuffer() {
VkDeviceSize bufferSize = sizeof(float) * 1024 * 1024; // 1M floats
// Create staging buffer (host visible)
VkBufferCreateInfo stagingBufferInfo{};
stagingBufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
stagingBufferInfo.size = bufferSize;
stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
stagingBufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VK_CHECK(vkCreateBuffer(device, &stagingBufferInfo, nullptr, &stagingBuffer));
VkMemoryRequirements stagingMemRequirements;
vkGetBufferMemoryRequirements(device, stagingBuffer, &stagingMemRequirements);
VkMemoryAllocateInfo stagingAllocInfo{};
stagingAllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
stagingAllocInfo.allocationSize = stagingMemRequirements.size;
stagingAllocInfo.memoryTypeIndex = findMemoryType(stagingMemRequirements.memoryTypeBits,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VK_CHECK(vkAllocateMemory(device, &stagingAllocInfo, nullptr, &stagingBufferMemory));
VK_CHECK(vkBindBufferMemory(device, stagingBuffer, stagingBufferMemory, 0));
// Fill staging buffer with test data
void* data;
VK_CHECK(vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data));
float* floatData = static_cast<float*>(data);
for (int i = 0; i < 1024 * 1024; i++) {
floatData[i] = static_cast<float>(i);
}
vkUnmapMemory(device, stagingBufferMemory);
// Create device local buffer
VkBufferCreateInfo bufferInfo{};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = bufferSize;
bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VK_CHECK(vkCreateBuffer(device, &bufferInfo, nullptr, &buffer));
VkMemoryRequirements memRequirements;
vkGetBufferMemoryRequirements(device, buffer, &memRequirements);
VkMemoryAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memRequirements.size;
allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
VK_CHECK(vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory));
VK_CHECK(vkBindBufferMemory(device, buffer, bufferMemory, 0));
}
void createQueryPool() {
VkQueryPoolCreateInfo queryPoolInfo{};
queryPoolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
queryPoolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
queryPoolInfo.queryCount = 4; // 2 for each timing operation (start/end)
VK_CHECK(vkCreateQueryPool(device, &queryPoolInfo, nullptr, &queryPool));
}
void recordCommands() {
VkCommandBufferBeginInfo beginInfo{};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VK_CHECK(vkBeginCommandBuffer(commandBuffer, &beginInfo));
// Time the buffer copy operation
VK_TIME_START(device, commandBuffer, queryPool, 0);
// Copy from staging buffer to device buffer
VkBufferCopy copyRegion{};
copyRegion.size = sizeof(float) * 1024 * 1024;
vkCmdCopyBuffer(commandBuffer, stagingBuffer, buffer, 1, &copyRegion);
VK_TIME_END(device, commandBuffer, queryPool, 0);
// Add a pipeline barrier to ensure copy is complete before next operation
VkMemoryBarrier barrier{};
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
vkCmdPipelineBarrier(commandBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 1, &barrier, 0, nullptr, 0, nullptr);
// Time another operation (copy back)
VK_TIME_START(device, commandBuffer, queryPool, 2);
vkCmdCopyBuffer(commandBuffer, buffer, stagingBuffer, 1, &copyRegion);
VK_TIME_END(device, commandBuffer, queryPool, 2);
VK_CHECK(vkEndCommandBuffer(commandBuffer));
}
void executeAndTime() {
auto cpuStart = std::chrono::high_resolution_clock::now();
// Submit command buffer
VkSubmitInfo submitInfo{};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBuffer;
VK_CHECK(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
VK_CHECK(vkQueueWaitIdle(queue));
auto cpuEnd = std::chrono::high_resolution_clock::now();
auto cpuDuration = std::chrono::duration_cast<std::chrono::microseconds>(cpuEnd - cpuStart);
// Get GPU timing results using macros
double gpuTime1, gpuTime2;
VK_TIME_GET_RESULT(device, queryPool, 0, timestampPeriod, gpuTime1);
VK_TIME_GET_RESULT(device, queryPool, 2, timestampPeriod, gpuTime2);
// Print results
std::cout << "=== Vulkan Test App Results ===" << std::endl;
std::cout << "CPU Time (total): " << cpuDuration.count() / 1000.0 << " ms" << std::endl;
std::cout << "GPU Time (copy to device): " << gpuTime1 << " ms" << std::endl;
std::cout << "GPU Time (copy from device): " << gpuTime2 << " ms" << std::endl;
std::cout << "GPU Time (total): " << (gpuTime1 + gpuTime2) << " ms" << std::endl;
std::cout << "Timestamp period: " << timestampPeriod << " ns" << std::endl;
std::cout << "Buffer size: " << (sizeof(float) * 1024 * 1024) / (1024 * 1024) << " MB" << std::endl;
}
void cleanup() {
vkDestroyQueryPool(device, queryPool, nullptr);
vkDestroyBuffer(device, buffer, nullptr);
vkFreeMemory(device, bufferMemory, nullptr);
vkDestroyBuffer(device, stagingBuffer, nullptr);
vkFreeMemory(device, stagingBufferMemory, nullptr);
vkDestroyCommandPool(device, commandPool, nullptr);
vkDestroyDevice(device, nullptr);
vkDestroyInstance(instance, nullptr);
}
};
int main() {
try {
std::cout << "Starting Vulkan Test App (Headless)..." << std::endl;
VulkanTestApp app;
std::cout << "Test completed successfully!" << std::endl;
}
catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment