#include "gpu.h" #include #define VK_NO_PROTOTYPES #include #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #include #else #include #endif // Objects struct gpu_buffer { VkBuffer handle; uint32_t memory; uint32_t offset; }; struct gpu_stream { VkCommandBuffer commands; }; size_t gpu_sizeof_buffer() { return sizeof(gpu_buffer); } // Internals typedef struct { VkDeviceMemory handle; void* pointer; uint32_t refs; } gpu_memory; typedef enum { GPU_MEMORY_BUFFER_GPU, GPU_MEMORY_BUFFER_CPU_WRITE, GPU_MEMORY_BUFFER_CPU_READ, GPU_MEMORY_COUNT } gpu_memory_type; typedef struct { gpu_memory* block; uint32_t cursor; uint16_t memoryType; uint16_t memoryFlags; } gpu_allocator; typedef struct { void* handle; VkObjectType type; uint32_t tick; } gpu_victim; typedef struct { uint32_t head; uint32_t tail; gpu_victim data[256]; } gpu_morgue; typedef struct { gpu_memory* memory; VkBuffer buffer; uint32_t cursor; uint32_t size; char* pointer; } gpu_scratchpad; typedef struct { VkCommandPool pool; gpu_stream streams[64]; VkSemaphore semaphores[2]; VkFence fence; } gpu_tick; // State static struct { void* library; gpu_config config; VkInstance instance; VkPhysicalDevice adapter; VkDevice device; VkQueue queue; uint32_t queueFamilyIndex; VkDebugUtilsMessengerEXT messenger; gpu_allocator allocators[GPU_MEMORY_COUNT]; uint8_t allocatorLookup[GPU_MEMORY_COUNT]; gpu_scratchpad scratchpad[2]; gpu_memory memory[256]; uint32_t streamCount; uint32_t tick[2]; gpu_tick ticks[4]; gpu_morgue morgue; } state; // Helpers enum { CPU, GPU }; #define MIN(a, b) (a < b ? a : b) #define MAX(a, b) (a > b ? a : b) #define COUNTOF(x) (sizeof(x) / sizeof(x[0])) #define ALIGN(p, n) (((uintptr_t) (p) + (n - 1)) & ~(n - 1)) #define VK(f, s) if (!vcheck(f, s)) #define CHECK(c, s) if (!check(c, s)) #define TICK_MASK (COUNTOF(state.ticks) - 1) #define MORGUE_MASK (COUNTOF(state.morgue.data) - 1) static gpu_memory* gpu_allocate(gpu_memory_type type, VkMemoryRequirements info, VkDeviceSize* offset); static void gpu_release(gpu_memory* memory); static void condemn(void* handle, VkObjectType type); static void expunge(void); static VkBool32 relay(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT flags, const VkDebugUtilsMessengerCallbackDataEXT* data, void* userdata); static void nickname(void* object, VkObjectType type, const char* name); static bool vcheck(VkResult result, const char* message); static bool check(bool condition, const char* message); // Loader // Functions that don't require an instance #define GPU_FOREACH_ANONYMOUS(X)\ X(vkCreateInstance) // Functions that require an instance but don't require a device #define GPU_FOREACH_INSTANCE(X)\ X(vkDestroyInstance)\ X(vkCreateDebugUtilsMessengerEXT)\ X(vkDestroyDebugUtilsMessengerEXT)\ X(vkDestroySurfaceKHR)\ X(vkEnumeratePhysicalDevices)\ X(vkGetPhysicalDeviceProperties2)\ X(vkGetPhysicalDeviceFeatures2)\ X(vkGetPhysicalDeviceMemoryProperties)\ X(vkGetPhysicalDeviceFormatProperties)\ X(vkGetPhysicalDeviceQueueFamilyProperties)\ X(vkGetPhysicalDeviceSurfaceSupportKHR)\ X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR)\ X(vkGetPhysicalDeviceSurfaceFormatsKHR)\ X(vkCreateDevice)\ X(vkDestroyDevice)\ X(vkGetDeviceQueue)\ X(vkGetDeviceProcAddr) // Functions that require a device #define GPU_FOREACH_DEVICE(X)\ X(vkSetDebugUtilsObjectNameEXT)\ X(vkCmdBeginDebugUtilsLabelEXT)\ X(vkCmdEndDebugUtilsLabelEXT)\ X(vkDeviceWaitIdle)\ X(vkQueueSubmit)\ X(vkQueuePresentKHR)\ X(vkCreateSwapchainKHR)\ X(vkDestroySwapchainKHR)\ X(vkGetSwapchainImagesKHR)\ X(vkAcquireNextImageKHR)\ X(vkCreateCommandPool)\ X(vkDestroyCommandPool)\ X(vkResetCommandPool)\ X(vkAllocateCommandBuffers)\ X(vkBeginCommandBuffer)\ X(vkEndCommandBuffer)\ X(vkCreateFence)\ X(vkDestroyFence)\ X(vkResetFences)\ X(vkGetFenceStatus)\ X(vkWaitForFences)\ X(vkCreateSemaphore)\ X(vkDestroySemaphore)\ X(vkCmdPipelineBarrier)\ X(vkCreateQueryPool)\ X(vkDestroyQueryPool)\ X(vkCmdResetQueryPool)\ X(vkCmdWriteTimestamp)\ X(vkCmdCopyQueryPoolResults)\ X(vkCreateBuffer)\ X(vkDestroyBuffer)\ X(vkGetBufferMemoryRequirements)\ X(vkBindBufferMemory)\ X(vkCreateImage)\ X(vkDestroyImage)\ X(vkGetImageMemoryRequirements)\ X(vkBindImageMemory)\ X(vkCmdCopyBuffer)\ X(vkCmdCopyImage)\ X(vkCmdBlitImage)\ X(vkCmdCopyBufferToImage)\ X(vkCmdCopyImageToBuffer)\ X(vkCmdFillBuffer)\ X(vkCmdClearColorImage)\ X(vkAllocateMemory)\ X(vkFreeMemory)\ X(vkMapMemory)\ X(vkCreateSampler)\ X(vkDestroySampler)\ X(vkCreateRenderPass)\ X(vkDestroyRenderPass)\ X(vkCmdBeginRenderPass)\ X(vkCmdEndRenderPass)\ X(vkCreateImageView)\ X(vkDestroyImageView)\ X(vkCreateFramebuffer)\ X(vkDestroyFramebuffer)\ X(vkCreateShaderModule)\ X(vkDestroyShaderModule)\ X(vkCreateDescriptorSetLayout)\ X(vkDestroyDescriptorSetLayout)\ X(vkCreatePipelineLayout)\ X(vkDestroyPipelineLayout)\ X(vkCreateDescriptorPool)\ X(vkDestroyDescriptorPool)\ X(vkAllocateDescriptorSets)\ X(vkResetDescriptorPool)\ X(vkUpdateDescriptorSets)\ X(vkCreatePipelineCache)\ X(vkDestroyPipelineCache)\ X(vkCreateGraphicsPipelines)\ X(vkCreateComputePipelines)\ X(vkDestroyPipeline)\ X(vkCmdSetViewport)\ X(vkCmdSetScissor)\ X(vkCmdPushConstants)\ X(vkCmdBindPipeline)\ X(vkCmdBindDescriptorSets)\ X(vkCmdBindVertexBuffers)\ X(vkCmdBindIndexBuffer)\ X(vkCmdDraw)\ X(vkCmdDrawIndexed)\ X(vkCmdDrawIndirect)\ X(vkCmdDrawIndexedIndirect)\ X(vkCmdDispatch)\ X(vkCmdDispatchIndirect) // Used to load/declare Vulkan functions without lots of clutter #define GPU_LOAD_ANONYMOUS(fn) fn = (PFN_##fn) vkGetInstanceProcAddr(NULL, #fn); #define GPU_LOAD_INSTANCE(fn) fn = (PFN_##fn) vkGetInstanceProcAddr(state.instance, #fn); #define GPU_LOAD_DEVICE(fn) fn = (PFN_##fn) vkGetDeviceProcAddr(state.device, #fn); #define GPU_DECLARE(fn) static PFN_##fn fn; // Declare function pointers GPU_FOREACH_ANONYMOUS(GPU_DECLARE) GPU_FOREACH_INSTANCE(GPU_DECLARE) GPU_FOREACH_DEVICE(GPU_DECLARE) // Buffer bool gpu_buffer_init(gpu_buffer* buffer, gpu_buffer_info* info) { if (info->handle) { buffer->handle = (VkBuffer) info->handle; return true; } VkBufferCreateInfo createInfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = info->size, .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT }; VK(vkCreateBuffer(state.device, &createInfo, NULL, &buffer->handle), "Could not create buffer") return false; nickname(buffer->handle, VK_OBJECT_TYPE_BUFFER, info->label); VkDeviceSize offset; VkMemoryRequirements requirements; vkGetBufferMemoryRequirements(state.device, buffer->handle, &requirements); gpu_memory* memory = gpu_allocate(GPU_MEMORY_BUFFER_GPU, requirements, &offset); VK(vkBindBufferMemory(state.device, buffer->handle, memory->handle, offset), "Could not bind buffer memory") { vkDestroyBuffer(state.device, buffer->handle, NULL); gpu_release(memory); return false; } if (info->pointer) { *info->pointer = memory->pointer ? (char*) memory->pointer + offset : NULL; } buffer->memory = memory - state.memory; return true; } void gpu_buffer_destroy(gpu_buffer* buffer) { condemn(buffer->handle, VK_OBJECT_TYPE_BUFFER); gpu_release(&state.memory[buffer->memory]); } void* gpu_map(gpu_buffer* buffer, uint32_t size, uint32_t align, gpu_map_mode mode) { gpu_scratchpad* pool = &state.scratchpad[mode]; uint32_t zone = state.tick[CPU] & TICK_MASK; uint32_t cursor = ALIGN(pool->cursor, align); bool oversized = size > (1 << 26); // "Big" buffers don't pollute the scratchpad (heuristic) if (oversized || cursor + size > pool->size) { uint32_t bufferSize; if (oversized) { bufferSize = size; } else { while (pool->size < size) { pool->size = pool->size ? (pool->size << 1) : (1 << 22); } bufferSize = pool->size * COUNTOF(state.ticks); } VkBufferCreateInfo info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = bufferSize, .usage = mode == GPU_MAP_WRITE ? (VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT) : VK_BUFFER_USAGE_TRANSFER_DST_BIT }; VkBuffer handle; VK(vkCreateBuffer(state.device, &info, NULL, &handle), "Could not create scratch buffer") return NULL; nickname(handle, VK_OBJECT_TYPE_BUFFER, "Scratchpad"); VkDeviceSize offset; VkMemoryRequirements requirements; vkGetBufferMemoryRequirements(state.device, handle, &requirements); gpu_memory* memory = gpu_allocate(GPU_MEMORY_BUFFER_CPU_WRITE + mode, requirements, &offset); VK(vkBindBufferMemory(state.device, handle, memory->handle, offset), "Could not bind scratchpad memory") { vkDestroyBuffer(state.device, handle, NULL); gpu_release(memory); return NULL; } if (oversized) { gpu_release(memory); condemn(handle, VK_OBJECT_TYPE_BUFFER); buffer->handle = handle; buffer->memory = ~0u; buffer->offset = 0; return memory->pointer; } else { gpu_release(pool->memory); condemn(pool->buffer, VK_OBJECT_TYPE_BUFFER); pool->memory = memory; pool->buffer = handle; pool->cursor = cursor = 0; pool->pointer = pool->memory->pointer; } } pool->cursor = cursor + size; buffer->handle = pool->buffer; buffer->memory = ~0u; buffer->offset = pool->size * zone + cursor; return pool->pointer + pool->size * zone + cursor; } // Stream gpu_stream* gpu_stream_begin(const char* label) { gpu_tick* tick = &state.ticks[state.tick[CPU] & TICK_MASK]; CHECK(state.streamCount < COUNTOF(tick->streams), "Too many streams") return NULL; gpu_stream* stream = &tick->streams[state.streamCount]; nickname(stream->commands, VK_OBJECT_TYPE_COMMAND_BUFFER, label); VkCommandBufferBeginInfo beginfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT }; VK(vkBeginCommandBuffer(stream->commands, &beginfo), "Failed to begin stream") return NULL; state.streamCount++; return stream; } void gpu_stream_end(gpu_stream* stream) { VK(vkEndCommandBuffer(stream->commands), "Failed to end stream") return; } void gpu_copy_buffers(gpu_stream* stream, gpu_buffer* src, gpu_buffer* dst, uint32_t srcOffset, uint32_t dstOffset, uint32_t size) { vkCmdCopyBuffer(stream->commands, src->handle, dst->handle, 1, &(VkBufferCopy) { .srcOffset = src->offset + srcOffset, .dstOffset = dst->offset + dstOffset, .size = size }); } void gpu_clear_buffer(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset, uint32_t size) { vkCmdFillBuffer(stream->commands, buffer->handle, offset, size, 0); } // Entry bool gpu_init(gpu_config* config) { state.config = *config; // Load #ifdef _WIN32 state.library = LoadLibraryA("vulkan-1.dll"); CHECK(state.library, "Failed to load vulkan library") return gpu_destroy(), false; PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr) GetProcAddress(state.library, "vkGetInstanceProcAddr"); #elif __APPLE__ state.library = dlopen("libvulkan.1.dylib", RTLD_NOW | RTLD_LOCAL); CHECK(state.library, "Failed to load vulkan library") return gpu_destroy(), false; PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr) dlsym(state.library, "vkGetInstanceProcAddr"); #else state.library = dlopen("libvulkan.so", RTLD_NOW | RTLD_LOCAL); CHECK(state.library, "Failed to load vulkan library") return gpu_destroy(), false; PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr) dlsym(state.library, "vkGetInstanceProcAddr"); #endif GPU_FOREACH_ANONYMOUS(GPU_LOAD_ANONYMOUS); { // Instance VkInstanceCreateInfo instanceInfo = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &(VkApplicationInfo) { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pEngineName = config->engineName, .engineVersion = VK_MAKE_VERSION(config->engineVersion[0], config->engineVersion[1], config->engineVersion[3]), .apiVersion = VK_MAKE_VERSION(1, 1, 0) }, .enabledLayerCount = state.config.debug ? 1 : 0, .ppEnabledLayerNames = (const char*[]) { "VK_LAYER_KHRONOS_validation" }, .enabledExtensionCount = state.config.debug ? 1 : 0, .ppEnabledExtensionNames = (const char*[]) { "VK_EXT_debug_utils" } }; VK(vkCreateInstance(&instanceInfo, NULL, &state.instance), "Instance creation failed") return gpu_destroy(), false; GPU_FOREACH_INSTANCE(GPU_LOAD_INSTANCE); if (state.config.debug) { VkDebugUtilsMessengerCreateInfoEXT messengerInfo = { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT, .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT, .pfnUserCallback = relay }; VK(vkCreateDebugUtilsMessengerEXT(state.instance, &messengerInfo, NULL, &state.messenger), "Debug hook setup failed") return gpu_destroy(), false; } } { // Device uint32_t deviceCount = 1; VK(vkEnumeratePhysicalDevices(state.instance, &deviceCount, &state.adapter), "Physical device enumeration failed") return gpu_destroy(), false; VkPhysicalDeviceMultiviewProperties multiviewProperties = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES }; VkPhysicalDeviceSubgroupProperties subgroupProperties = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES, .pNext = &multiviewProperties }; VkPhysicalDeviceProperties2 properties2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, .pNext = &subgroupProperties }; vkGetPhysicalDeviceProperties2(state.adapter, &properties2); if (config->device) { VkPhysicalDeviceProperties* properties = &properties2.properties; config->device->deviceId = properties->deviceID; config->device->vendorId = properties->vendorID; memcpy(config->device->deviceName, properties->deviceName, MIN(sizeof(config->device->deviceName), sizeof(properties->deviceName))); config->device->renderer = "Vulkan"; config->device->subgroupSize = subgroupProperties.subgroupSize; config->device->discrete = properties->deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; } if (config->limits) { VkPhysicalDeviceLimits* limits = &properties2.properties.limits; config->limits->textureSize2D = MIN(limits->maxImageDimension2D, UINT16_MAX); config->limits->textureSize3D = MIN(limits->maxImageDimension3D, UINT16_MAX); config->limits->textureSizeCube = MIN(limits->maxImageDimensionCube, UINT16_MAX); config->limits->textureLayers = MIN(limits->maxImageArrayLayers, UINT16_MAX); config->limits->renderSize[0] = limits->maxFramebufferWidth; config->limits->renderSize[1] = limits->maxFramebufferHeight; config->limits->renderSize[2] = multiviewProperties.maxMultiviewViewCount; config->limits->uniformBufferRange = limits->maxUniformBufferRange; config->limits->storageBufferRange = limits->maxStorageBufferRange; config->limits->uniformBufferAlign = limits->minUniformBufferOffsetAlignment; config->limits->storageBufferAlign = limits->minStorageBufferOffsetAlignment; config->limits->vertexAttributes = limits->maxVertexInputAttributes; config->limits->vertexBuffers = limits->maxVertexInputBindings; config->limits->vertexBufferStride = MIN(limits->maxVertexInputBindingStride, UINT16_MAX); config->limits->vertexShaderOutputs = limits->maxVertexOutputComponents; config->limits->clipDistances = limits->maxClipDistances; config->limits->cullDistances = limits->maxCullDistances; config->limits->clipAndCullDistances = limits->maxCombinedClipAndCullDistances; config->limits->computeDispatchCount[0] = limits->maxComputeWorkGroupCount[0]; config->limits->computeDispatchCount[1] = limits->maxComputeWorkGroupCount[1]; config->limits->computeDispatchCount[2] = limits->maxComputeWorkGroupCount[2]; config->limits->computeWorkgroupSize[0] = limits->maxComputeWorkGroupSize[0]; config->limits->computeWorkgroupSize[1] = limits->maxComputeWorkGroupSize[1]; config->limits->computeWorkgroupSize[2] = limits->maxComputeWorkGroupSize[2]; config->limits->computeWorkgroupVolume = limits->maxComputeWorkGroupInvocations; config->limits->computeSharedMemory = limits->maxComputeSharedMemorySize; config->limits->pushConstantSize = limits->maxPushConstantsSize; config->limits->indirectDrawCount = limits->maxDrawIndirectCount; config->limits->instances = multiviewProperties.maxMultiviewInstanceIndex; config->limits->anisotropy = limits->maxSamplerAnisotropy; config->limits->pointSize = limits->pointSizeRange[1]; } VkPhysicalDeviceShaderDrawParameterFeatures shaderDrawParameterFeatures = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES }; VkPhysicalDeviceMultiviewFeatures multiviewFeatures = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, .pNext = &shaderDrawParameterFeatures }; VkPhysicalDeviceFeatures2 enabledFeatures = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = &multiviewFeatures }; if (config->features) { VkPhysicalDeviceFeatures2 features2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 }; VkPhysicalDeviceFeatures* enable = &enabledFeatures.features; VkPhysicalDeviceFeatures* supports = &features2.features; vkGetPhysicalDeviceFeatures2(state.adapter, &features2); // Required features enable->fullDrawIndexUint32 = true; multiviewFeatures.multiview = true; shaderDrawParameterFeatures.shaderDrawParameters = true; // Internal features (exposed as limits) enable->samplerAnisotropy = supports->samplerAnisotropy; enable->multiDrawIndirect = supports->multiDrawIndirect; enable->shaderClipDistance = supports->shaderClipDistance; enable->shaderCullDistance = supports->shaderCullDistance; enable->largePoints = supports->largePoints; // Optional features (currently always enabled when supported) config->features->textureBC = enable->textureCompressionBC = supports->textureCompressionBC; config->features->textureASTC = enable->textureCompressionASTC_LDR = supports->textureCompressionASTC_LDR; config->features->wireframe = enable->fillModeNonSolid = supports->fillModeNonSolid; config->features->depthClamp = enable->depthClamp = supports->depthClamp; config->features->indirectDrawFirstInstance = enable->drawIndirectFirstInstance = supports->drawIndirectFirstInstance; config->features->float64 = enable->shaderFloat64 = supports->shaderFloat64; config->features->int64 = enable->shaderInt64 = supports->shaderInt64; config->features->int16 = enable->shaderInt16 = supports->shaderInt16; } state.queueFamilyIndex = ~0u; VkQueueFamilyProperties queueFamilies[8]; uint32_t queueFamilyCount = COUNTOF(queueFamilies); vkGetPhysicalDeviceQueueFamilyProperties(state.adapter, &queueFamilyCount, queueFamilies); for (uint32_t i = 0; i < queueFamilyCount; i++) { uint32_t mask = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; if ((queueFamilies[i].queueFlags & mask) == mask) { state.queueFamilyIndex = i; break; } } CHECK(state.queueFamilyIndex != ~0u, "Queue selection failed") return gpu_destroy(), false; VkDeviceCreateInfo deviceInfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = config->features ? &enabledFeatures : NULL, .queueCreateInfoCount = 1, .pQueueCreateInfos = &(VkDeviceQueueCreateInfo) { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .queueFamilyIndex = state.queueFamilyIndex, .pQueuePriorities = &(float) { 1.f }, .queueCount = 1 } }; VK(vkCreateDevice(state.adapter, &deviceInfo, NULL, &state.device), "Device creation failed") return gpu_destroy(), false; vkGetDeviceQueue(state.device, state.queueFamilyIndex, 0, &state.queue); GPU_FOREACH_DEVICE(GPU_LOAD_DEVICE); } { // Allocators VkPhysicalDeviceMemoryProperties memoryProperties; vkGetPhysicalDeviceMemoryProperties(state.adapter, &memoryProperties); VkMemoryType* memoryTypes = memoryProperties.memoryTypes; VkMemoryPropertyFlags hostVisible = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; struct { VkBufferUsageFlags usage; VkMemoryPropertyFlags flags; } bufferFlags[] = { [GPU_MEMORY_BUFFER_GPU] = { .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, .flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT }, [GPU_MEMORY_BUFFER_CPU_WRITE] = { .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, .flags = hostVisible | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT }, [GPU_MEMORY_BUFFER_CPU_READ] = { .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, .flags = hostVisible | VK_MEMORY_PROPERTY_HOST_CACHED_BIT } }; // Without VK_KHR_maintenance4, you have to create objects to figure out memory requirements for (uint32_t i = 0; i < COUNTOF(bufferFlags); i++) { gpu_allocator* allocator = &state.allocators[i]; VkBufferCreateInfo createInfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .usage = bufferFlags[i].usage, .size = 4 }; VkBuffer buffer; VkMemoryRequirements requirements; vkCreateBuffer(state.device, &createInfo, NULL, &buffer); vkGetBufferMemoryRequirements(state.device, buffer, &requirements); vkDestroyBuffer(state.device, buffer, NULL); VkMemoryPropertyFlags fallback = i == GPU_MEMORY_BUFFER_GPU ? VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : hostVisible; for (uint32_t j = 0; j < memoryProperties.memoryTypeCount; j++) { if (~requirements.memoryTypeBits & (1 << j)) { continue; } if ((memoryTypes[j].propertyFlags & fallback) == fallback) { allocator->memoryFlags = memoryTypes[j].propertyFlags; allocator->memoryType = j; continue; } if ((memoryTypes[j].propertyFlags & bufferFlags[i].flags) == bufferFlags[i].flags) { allocator->memoryFlags = memoryTypes[j].propertyFlags; allocator->memoryType = j; break; } } } } // Ticks for (uint32_t i = 0; i < COUNTOF(state.ticks); i++) { VkCommandPoolCreateInfo poolInfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, .queueFamilyIndex = state.queueFamilyIndex }; VK(vkCreateCommandPool(state.device, &poolInfo, NULL, &state.ticks[i].pool), "Command pool creation failed") return gpu_destroy(), false; VkCommandBufferAllocateInfo allocateInfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .commandPool = state.ticks[i].pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = COUNTOF(state.ticks[i].streams) }; VkCommandBuffer* commandBuffers = &state.ticks[i].streams[0].commands; VK(vkAllocateCommandBuffers(state.device, &allocateInfo, commandBuffers), "Commmand buffer allocation failed") return gpu_destroy(), false; VkSemaphoreCreateInfo semaphoreInfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO }; VK(vkCreateSemaphore(state.device, &semaphoreInfo, NULL, &state.ticks[i].semaphores[0]), "Semaphore creation failed") return gpu_destroy(), false; VK(vkCreateSemaphore(state.device, &semaphoreInfo, NULL, &state.ticks[i].semaphores[1]), "Semaphore creation failed") return gpu_destroy(), false; VkFenceCreateInfo fenceInfo = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .flags = VK_FENCE_CREATE_SIGNALED_BIT }; VK(vkCreateFence(state.device, &fenceInfo, NULL, &state.ticks[i].fence), "Fence creation failed") return gpu_destroy(), false; } state.tick[CPU] = COUNTOF(state.ticks) - 1; return true; } void gpu_destroy(void) { if (state.device) vkDeviceWaitIdle(state.device); state.tick[GPU] = state.tick[CPU]; expunge(); if (state.scratchpad[0].buffer) vkDestroyBuffer(state.device, state.scratchpad[0].buffer, NULL); if (state.scratchpad[1].buffer) vkDestroyBuffer(state.device, state.scratchpad[0].buffer, NULL); for (uint32_t i = 0; i < COUNTOF(state.ticks); i++) { gpu_tick* tick = &state.ticks[i]; if (tick->pool) vkDestroyCommandPool(state.device, tick->pool, NULL); if (tick->semaphores[0]) vkDestroySemaphore(state.device, tick->semaphores[0], NULL); if (tick->semaphores[1]) vkDestroySemaphore(state.device, tick->semaphores[1], NULL); if (tick->fence) vkDestroyFence(state.device, tick->fence, NULL); } for (uint32_t i = 0; i < COUNTOF(state.memory); i++) { if (state.memory[i].handle) vkFreeMemory(state.device, state.memory[i].handle, NULL); } if (state.device) vkDestroyDevice(state.device, NULL); if (state.messenger) vkDestroyDebugUtilsMessengerEXT(state.instance, state.messenger, NULL); if (state.instance) vkDestroyInstance(state.instance, NULL); #ifdef _WIN32 if (state.library) FreeLibrary(state.library); #else if (state.library) dlclose(state.library); #endif memset(&state, 0, sizeof(state)); } uint32_t gpu_begin() { gpu_tick* tick = &state.ticks[++state.tick[CPU] & TICK_MASK]; VK(vkWaitForFences(state.device, 1, &tick->fence, VK_FALSE, ~0ull), "Fence wait failed") return 0; VK(vkResetFences(state.device, 1, &tick->fence), "Fence reset failed") return 0; VK(vkResetCommandPool(state.device, tick->pool, 0), "Command pool reset failed") return 0; state.tick[GPU] = MAX(state.tick[GPU], state.tick[CPU] - COUNTOF(state.ticks)); state.streamCount = 0; expunge(); return state.tick[CPU]; } void gpu_submit(gpu_stream** streams, uint32_t count) { gpu_tick* tick = &state.ticks[state.tick[CPU] & TICK_MASK]; VkCommandBuffer commands[COUNTOF(tick->streams)]; for (uint32_t i = 0; i < count; i++) { commands[i] = streams[i]->commands; } VkSubmitInfo submit = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = count, .pCommandBuffers = commands }; VK(vkQueueSubmit(state.queue, 1, &submit, tick->fence), "Queue submit failed") return; } bool gpu_finished(uint32_t tick) { return state.tick[GPU] >= tick; } // Helpers static gpu_memory* gpu_allocate(gpu_memory_type type, VkMemoryRequirements info, VkDeviceSize* offset) { gpu_allocator* allocator = &state.allocators[state.allocatorLookup[type]]; static const uint32_t blockSizes[] = { [GPU_MEMORY_BUFFER_GPU] = 1 << 26, [GPU_MEMORY_BUFFER_CPU_WRITE] = 0, [GPU_MEMORY_BUFFER_CPU_READ] = 0 }; uint32_t blockSize = blockSizes[type]; uint32_t cursor = ALIGN(allocator->cursor, info.alignment); if (allocator->block && cursor + info.size <= blockSize) { allocator->cursor = cursor + info.size; allocator->block->refs++; *offset = cursor; return allocator->block; } // If there wasn't an active block or it overflowed, find an empty block to allocate for (uint32_t i = 0; i < COUNTOF(state.memory); i++) { if (!state.memory[i].handle) { gpu_memory* memory = &state.memory[i]; VkMemoryAllocateInfo memoryInfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = MAX(blockSize, info.size), .memoryTypeIndex = allocator->memoryType }; VK(vkAllocateMemory(state.device, &memoryInfo, NULL, &memory->handle), "Failed to allocate GPU memory") { allocator->block = NULL; return NULL; } if (allocator->memoryFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { VK(vkMapMemory(state.device, memory->handle, 0, VK_WHOLE_SIZE, 0, &memory->pointer), "Failed to map memory") { vkFreeMemory(state.device, memory->handle, NULL); memory->handle = NULL; return NULL; } } allocator->block = memory; allocator->cursor = info.size; *offset = 0; return memory; } } check(false, "Out of space for memory blocks"); return NULL; } static void gpu_release(gpu_memory* memory) { if (memory && --memory->refs == 0) { condemn(memory->handle, VK_OBJECT_TYPE_DEVICE_MEMORY); memory->handle = NULL; } } static void condemn(void* handle, VkObjectType type) { if (!handle) return; gpu_morgue* morgue = &state.morgue; // If the morgue is full, perform an emergency expunge if (morgue->head - morgue->tail >= COUNTOF(morgue->data)) { vkDeviceWaitIdle(state.device); state.tick[GPU] = state.tick[CPU]; expunge(); } morgue->data[morgue->head++ & MORGUE_MASK] = (gpu_victim) { handle, type, state.tick[CPU] }; } static void expunge() { gpu_morgue* morgue = &state.morgue; while (morgue->tail != morgue->head && state.tick[GPU] >= morgue->data[morgue->tail & MORGUE_MASK].tick) { gpu_victim* victim = &morgue->data[morgue->tail++ & MORGUE_MASK]; switch (victim->type) { case VK_OBJECT_TYPE_BUFFER: vkDestroyBuffer(state.device, victim->handle, NULL); break; case VK_OBJECT_TYPE_DEVICE_MEMORY: vkFreeMemory(state.device, victim->handle, NULL); break; default: break; } } } static VkBool32 relay(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT flags, const VkDebugUtilsMessengerCallbackDataEXT* data, void* userdata) { if (state.config.callback) { bool severe = severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; state.config.callback(state.config.userdata, data->pMessage, severe); } return VK_FALSE; } static void nickname(void* handle, VkObjectType type, const char* name) { if (name && state.config.debug) { union { uint64_t u64; void* p; } pointer = { .p = handle }; VkDebugUtilsObjectNameInfoEXT info = { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, .objectType = type, .objectHandle = pointer.u64, .pObjectName = name }; VK(vkSetDebugUtilsObjectNameEXT(state.device, &info), "Nickname failed") {} } } static bool vcheck(VkResult result, const char* message) { if (result >= 0) return true; if (!state.config.callback) return false; #define CASE(x) case x: state.config.callback(state.config.userdata, "Vulkan error: " #x, false); break; switch (result) { CASE(VK_ERROR_OUT_OF_HOST_MEMORY); CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); CASE(VK_ERROR_INITIALIZATION_FAILED); CASE(VK_ERROR_DEVICE_LOST); CASE(VK_ERROR_MEMORY_MAP_FAILED); CASE(VK_ERROR_LAYER_NOT_PRESENT); CASE(VK_ERROR_EXTENSION_NOT_PRESENT); CASE(VK_ERROR_FEATURE_NOT_PRESENT); CASE(VK_ERROR_TOO_MANY_OBJECTS); CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); CASE(VK_ERROR_FRAGMENTED_POOL); CASE(VK_ERROR_OUT_OF_POOL_MEMORY); default: break; } #undef CASE state.config.callback(state.config.userdata, message, true); return false; } static bool check(bool condition, const char* message) { if (!condition && state.config.callback) { state.config.callback(state.config.userdata, message, true); } return condition; }