lovr/src/core/gpu_vk.c

3175 lines
124 KiB
C

#include "gpu.h"
#include <string.h>
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include <dlfcn.h>
#endif
#if defined(_WIN32)
#define VK_USE_PLATFORM_WIN32_KHR
#elif defined(__APPLE__)
#define VK_USE_PLATFORM_METAL_EXT
#elif defined(__linux__) && !defined(__ANDROID__)
#define VK_USE_PLATFORM_XCB_KHR
#endif
#define VK_NO_PROTOTYPES
#include <vulkan/vulkan.h>
// Objects
struct gpu_buffer {
VkBuffer handle;
uint32_t memory;
};
struct gpu_texture {
VkImage handle;
VkImageView view;
VkImageAspectFlagBits aspect;
VkImageLayout layout;
uint32_t memory;
uint32_t layers;
uint8_t format;
bool srgb;
};
struct gpu_sampler {
VkSampler handle;
};
struct gpu_layout {
VkDescriptorSetLayout handle;
uint32_t descriptorCounts[7];
};
struct gpu_shader {
VkShaderModule handles[2];
VkPipelineLayout pipelineLayout;
};
struct gpu_bundle_pool {
VkDescriptorPool handle;
};
struct gpu_bundle {
VkDescriptorSet handle;
};
struct gpu_pass {
VkRenderPass handle;
uint8_t colorCount;
uint8_t samples;
uint8_t loadMask;
bool depthLoad;
bool surface;
};
struct gpu_pipeline {
VkPipeline handle;
};
struct gpu_tally {
VkQueryPool handle;
};
struct gpu_stream {
VkCommandBuffer commands;
};
size_t gpu_sizeof_buffer(void) { return sizeof(gpu_buffer); }
size_t gpu_sizeof_texture(void) { return sizeof(gpu_texture); }
size_t gpu_sizeof_sampler(void) { return sizeof(gpu_sampler); }
size_t gpu_sizeof_layout(void) { return sizeof(gpu_layout); }
size_t gpu_sizeof_shader(void) { return sizeof(gpu_shader); }
size_t gpu_sizeof_bundle_pool(void) { return sizeof(gpu_bundle_pool); }
size_t gpu_sizeof_bundle(void) { return sizeof(gpu_bundle); }
size_t gpu_sizeof_pass(void) { return sizeof(gpu_pass); }
size_t gpu_sizeof_pipeline(void) { return sizeof(gpu_pipeline); }
size_t gpu_sizeof_tally(void) { return sizeof(gpu_tally); }
// Internals
typedef struct {
VkDeviceMemory handle;
void* pointer;
uint32_t refs;
} gpu_memory;
typedef enum {
GPU_MEMORY_BUFFER_STATIC,
GPU_MEMORY_BUFFER_STREAM,
GPU_MEMORY_BUFFER_UPLOAD,
GPU_MEMORY_BUFFER_DOWNLOAD,
GPU_MEMORY_TEXTURE_COLOR,
GPU_MEMORY_TEXTURE_D16,
GPU_MEMORY_TEXTURE_D32F,
GPU_MEMORY_TEXTURE_D24S8,
GPU_MEMORY_TEXTURE_D32FS8,
GPU_MEMORY_TEXTURE_LAZY_COLOR,
GPU_MEMORY_TEXTURE_LAZY_D16,
GPU_MEMORY_TEXTURE_LAZY_D32F,
GPU_MEMORY_TEXTURE_LAZY_D24S8,
GPU_MEMORY_TEXTURE_LAZY_D32FS8,
GPU_MEMORY_COUNT
} gpu_memory_type;
typedef struct {
gpu_memory* block;
uint32_t cursor;
uint16_t memoryType;
uint16_t memoryFlags;
} gpu_allocator;
typedef struct {
void* handle;
VkObjectType type;
uint32_t tick;
} gpu_victim;
typedef struct {
uint32_t head;
uint32_t tail;
gpu_victim data[1024];
} gpu_morgue;
typedef struct {
VkSurfaceKHR handle;
VkSwapchainKHR swapchain;
VkSurfaceCapabilitiesKHR capabilities;
VkSurfaceFormatKHR format;
VkSemaphore semaphore;
gpu_texture images[8];
uint32_t imageIndex;
bool vsync;
bool valid;
} gpu_surface;
typedef struct {
VkCommandPool pool;
gpu_stream streams[64];
VkSemaphore semaphores[2];
VkFence fence;
} gpu_tick;
typedef struct {
bool portability;
bool validation;
bool debug;
bool shaderDebug;
bool surface;
bool surfaceOS;
bool swapchain;
bool colorspace;
bool depthResolve;
bool formatList;
} gpu_extensions;
// State
static struct {
void* library;
gpu_config config;
gpu_extensions extensions;
gpu_surface surface;
VkInstance instance;
VkPhysicalDevice adapter;
VkDevice device;
VkQueue queue;
uint32_t queueFamilyIndex;
VkPipelineCache pipelineCache;
VkDebugUtilsMessengerEXT messenger;
gpu_allocator allocators[GPU_MEMORY_COUNT];
uint8_t allocatorLookup[GPU_MEMORY_COUNT];
gpu_memory memory[256];
uint32_t streamCount;
uint32_t tick[2];
gpu_tick ticks[2];
gpu_morgue morgue;
} state;
// Helpers
enum { CPU, GPU };
enum { LINEAR, SRGB };
#define MIN(a, b) (a < b ? a : b)
#define MAX(a, b) (a > b ? a : b)
#define COUNTOF(x) (sizeof(x) / sizeof(x[0]))
#define ALIGN(p, n) (((uintptr_t) (p) + (n - 1)) & ~(n - 1))
#define LOG(s) if (state.config.fnLog) state.config.fnLog(state.config.userdata, s, false)
#define VK(f, s) if (!vcheck(f, s))
#define CHECK(c, s) if (!check(c, s))
#define TICK_MASK (COUNTOF(state.ticks) - 1)
#define MORGUE_MASK (COUNTOF(state.morgue.data) - 1)
static gpu_memory* gpu_allocate(gpu_memory_type type, VkMemoryRequirements info, VkDeviceSize* offset);
static void gpu_release(gpu_memory* memory);
static void condemn(void* handle, VkObjectType type);
static void expunge(void);
static bool hasLayer(VkLayerProperties* layers, uint32_t count, const char* layer);
static bool hasExtension(VkExtensionProperties* extensions, uint32_t count, const char* extension);
static VkBufferUsageFlags getBufferUsage(gpu_buffer_type type);
static bool transitionAttachment(gpu_texture* texture, bool begin, bool resolve, bool discard, VkImageMemoryBarrier2KHR* barrier);
static VkImageLayout getNaturalLayout(uint32_t usage, VkImageAspectFlags aspect);
static VkFormat convertFormat(gpu_texture_format format, int colorspace);
static VkPipelineStageFlags2 convertPhase(gpu_phase phase, bool dst);
static VkAccessFlags2 convertCache(gpu_cache cache);
static VkBool32 relay(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT flags, const VkDebugUtilsMessengerCallbackDataEXT* data, void* userdata);
static void nickname(void* object, VkObjectType type, const char* name);
static bool vcheck(VkResult result, const char* message);
static bool check(bool condition, const char* message);
// Loader
// Functions that don't require an instance
#define GPU_FOREACH_ANONYMOUS(X)\
X(vkEnumerateInstanceLayerProperties)\
X(vkEnumerateInstanceExtensionProperties)\
X(vkCreateInstance)
// Functions that require an instance but don't require a device
#define GPU_FOREACH_INSTANCE(X)\
X(vkDestroyInstance)\
X(vkCreateDebugUtilsMessengerEXT)\
X(vkDestroyDebugUtilsMessengerEXT)\
X(vkDestroySurfaceKHR)\
X(vkEnumeratePhysicalDevices)\
X(vkGetPhysicalDeviceProperties2)\
X(vkGetPhysicalDeviceFeatures2)\
X(vkGetPhysicalDeviceMemoryProperties)\
X(vkGetPhysicalDeviceFormatProperties)\
X(vkGetPhysicalDeviceQueueFamilyProperties)\
X(vkGetPhysicalDeviceSurfaceSupportKHR)\
X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR)\
X(vkGetPhysicalDeviceSurfaceFormatsKHR)\
X(vkEnumerateDeviceExtensionProperties)\
X(vkCreateDevice)\
X(vkDestroyDevice)\
X(vkGetDeviceQueue)\
X(vkGetDeviceProcAddr)
// Functions that require a device
#define GPU_FOREACH_DEVICE(X)\
X(vkSetDebugUtilsObjectNameEXT)\
X(vkDeviceWaitIdle)\
X(vkQueueSubmit)\
X(vkQueuePresentKHR)\
X(vkCreateSwapchainKHR)\
X(vkDestroySwapchainKHR)\
X(vkGetSwapchainImagesKHR)\
X(vkAcquireNextImageKHR)\
X(vkCreateCommandPool)\
X(vkDestroyCommandPool)\
X(vkResetCommandPool)\
X(vkAllocateCommandBuffers)\
X(vkBeginCommandBuffer)\
X(vkEndCommandBuffer)\
X(vkCreateFence)\
X(vkDestroyFence)\
X(vkResetFences)\
X(vkGetFenceStatus)\
X(vkWaitForFences)\
X(vkCreateSemaphore)\
X(vkDestroySemaphore)\
X(vkCmdPipelineBarrier2KHR)\
X(vkCreateQueryPool)\
X(vkDestroyQueryPool)\
X(vkCmdResetQueryPool)\
X(vkCmdBeginQuery)\
X(vkCmdEndQuery)\
X(vkCmdWriteTimestamp)\
X(vkCmdCopyQueryPoolResults)\
X(vkGetQueryPoolResults)\
X(vkCreateBuffer)\
X(vkDestroyBuffer)\
X(vkGetBufferMemoryRequirements)\
X(vkBindBufferMemory)\
X(vkCreateImage)\
X(vkDestroyImage)\
X(vkGetImageMemoryRequirements)\
X(vkBindImageMemory)\
X(vkCmdCopyBuffer)\
X(vkCmdCopyImage)\
X(vkCmdBlitImage)\
X(vkCmdCopyBufferToImage)\
X(vkCmdCopyImageToBuffer)\
X(vkCmdFillBuffer)\
X(vkCmdClearColorImage)\
X(vkCmdClearDepthStencilImage)\
X(vkAllocateMemory)\
X(vkFreeMemory)\
X(vkMapMemory)\
X(vkCreateSampler)\
X(vkDestroySampler)\
X(vkCreateRenderPass2KHR)\
X(vkDestroyRenderPass)\
X(vkCmdBeginRenderPass2KHR)\
X(vkCmdEndRenderPass2KHR)\
X(vkCreateImageView)\
X(vkDestroyImageView)\
X(vkCreateFramebuffer)\
X(vkDestroyFramebuffer)\
X(vkCreateShaderModule)\
X(vkDestroyShaderModule)\
X(vkCreateDescriptorSetLayout)\
X(vkDestroyDescriptorSetLayout)\
X(vkCreatePipelineLayout)\
X(vkDestroyPipelineLayout)\
X(vkCreateDescriptorPool)\
X(vkDestroyDescriptorPool)\
X(vkAllocateDescriptorSets)\
X(vkResetDescriptorPool)\
X(vkUpdateDescriptorSets)\
X(vkCreatePipelineCache)\
X(vkDestroyPipelineCache)\
X(vkGetPipelineCacheData)\
X(vkCreateGraphicsPipelines)\
X(vkCreateComputePipelines)\
X(vkDestroyPipeline)\
X(vkCmdSetViewport)\
X(vkCmdSetScissor)\
X(vkCmdPushConstants)\
X(vkCmdBindPipeline)\
X(vkCmdBindDescriptorSets)\
X(vkCmdBindVertexBuffers)\
X(vkCmdBindIndexBuffer)\
X(vkCmdDraw)\
X(vkCmdDrawIndexed)\
X(vkCmdDrawIndirect)\
X(vkCmdDrawIndexedIndirect)\
X(vkCmdDispatch)\
X(vkCmdDispatchIndirect)
// Used to load/declare Vulkan functions without lots of clutter
#define GPU_LOAD_ANONYMOUS(fn) fn = (PFN_##fn) vkGetInstanceProcAddr(NULL, #fn);
#define GPU_LOAD_INSTANCE(fn) fn = (PFN_##fn) vkGetInstanceProcAddr(state.instance, #fn);
#define GPU_LOAD_DEVICE(fn) fn = (PFN_##fn) vkGetDeviceProcAddr(state.device, #fn);
#define GPU_DECLARE(fn) static PFN_##fn fn;
// Declare function pointers
GPU_DECLARE(vkGetInstanceProcAddr)
GPU_FOREACH_ANONYMOUS(GPU_DECLARE)
GPU_FOREACH_INSTANCE(GPU_DECLARE)
GPU_FOREACH_DEVICE(GPU_DECLARE)
// Buffer
bool gpu_buffer_init(gpu_buffer* buffer, gpu_buffer_info* info) {
if (info->handle) {
buffer->handle = (VkBuffer) info->handle;
buffer->memory = ~0u;
nickname(buffer->handle, VK_OBJECT_TYPE_BUFFER, info->label);
return true;
}
VkBufferCreateInfo createInfo = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = info->size,
.usage = getBufferUsage(info->type)
};
VK(vkCreateBuffer(state.device, &createInfo, NULL, &buffer->handle), "Could not create buffer") return false;
nickname(buffer->handle, VK_OBJECT_TYPE_BUFFER, info->label);
VkDeviceSize offset;
VkMemoryRequirements requirements;
vkGetBufferMemoryRequirements(state.device, buffer->handle, &requirements);
gpu_memory* memory = gpu_allocate((gpu_memory_type) info->type, requirements, &offset);
VK(vkBindBufferMemory(state.device, buffer->handle, memory->handle, offset), "Could not bind buffer memory") {
vkDestroyBuffer(state.device, buffer->handle, NULL);
gpu_release(memory);
return false;
}
if (info->pointer) {
*info->pointer = memory->pointer ? (char*) memory->pointer + offset : NULL;
}
buffer->memory = memory - state.memory;
return true;
}
void gpu_buffer_destroy(gpu_buffer* buffer) {
if (buffer->memory == ~0u) return;
condemn(buffer->handle, VK_OBJECT_TYPE_BUFFER);
gpu_release(&state.memory[buffer->memory]);
}
// Texture
bool gpu_texture_init(gpu_texture* texture, gpu_texture_info* info) {
static const VkImageType imageTypes[] = {
[GPU_TEXTURE_2D] = VK_IMAGE_TYPE_2D,
[GPU_TEXTURE_3D] = VK_IMAGE_TYPE_3D,
[GPU_TEXTURE_CUBE] = VK_IMAGE_TYPE_2D,
[GPU_TEXTURE_ARRAY] = VK_IMAGE_TYPE_2D
};
switch (info->format) {
case GPU_FORMAT_D16: texture->aspect = VK_IMAGE_ASPECT_DEPTH_BIT; break;
case GPU_FORMAT_D32F: texture->aspect = VK_IMAGE_ASPECT_DEPTH_BIT; break;
case GPU_FORMAT_D24S8: texture->aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; break;
case GPU_FORMAT_D32FS8: texture->aspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; break;
default: texture->aspect = VK_IMAGE_ASPECT_COLOR_BIT; break;
}
texture->layout = getNaturalLayout(info->usage, texture->aspect);
texture->layers = info->type == GPU_TEXTURE_3D ? 0 : info->size[2];
texture->format = info->format;
texture->srgb = info->srgb;
gpu_texture_view_info viewInfo = {
.source = texture,
.type = info->type,
.usage = info->usage
};
if (info->handle) {
texture->memory = ~0u;
texture->handle = (VkImage) info->handle;
nickname(texture->handle, VK_OBJECT_TYPE_IMAGE, info->label);
return gpu_texture_init_view(texture, &viewInfo);
}
bool mutableFormat = info->srgb && (info->usage & GPU_TEXTURE_STORAGE);
VkImageCreateInfo imageInfo = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.flags =
(info->type == GPU_TEXTURE_3D ? VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT : 0) |
(info->type == GPU_TEXTURE_CUBE ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0) |
(mutableFormat ? (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) : 0),
.imageType = imageTypes[info->type],
.format = convertFormat(texture->format, info->srgb),
.extent.width = info->size[0],
.extent.height = info->size[1],
.extent.depth = texture->layers ? 1 : info->size[2],
.mipLevels = info->mipmaps,
.arrayLayers = texture->layers ? texture->layers : 1,
.samples = info->samples ? info->samples : 1,
.usage =
(((info->usage & GPU_TEXTURE_RENDER) && texture->aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT : 0) |
(((info->usage & GPU_TEXTURE_RENDER) && texture->aspect != VK_IMAGE_ASPECT_COLOR_BIT) ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : 0) |
((info->usage & GPU_TEXTURE_SAMPLE) ? VK_IMAGE_USAGE_SAMPLED_BIT : 0) |
((info->usage & GPU_TEXTURE_STORAGE) ? VK_IMAGE_USAGE_STORAGE_BIT : 0) |
((info->usage & GPU_TEXTURE_COPY_SRC) ? VK_IMAGE_USAGE_TRANSFER_SRC_BIT : 0) |
((info->usage & GPU_TEXTURE_COPY_DST) ? VK_IMAGE_USAGE_TRANSFER_DST_BIT : 0) |
((info->usage == GPU_TEXTURE_RENDER) ? VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT : 0) |
(info->upload.levelCount > 0 ? VK_IMAGE_USAGE_TRANSFER_DST_BIT : 0) |
(info->upload.generateMipmaps ? VK_IMAGE_USAGE_TRANSFER_SRC_BIT : 0)
};
VkFormat formats[2];
VkImageFormatListCreateInfo imageFormatList;
if (mutableFormat && state.extensions.formatList) {
imageFormatList = (VkImageFormatListCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO,
.viewFormatCount = COUNTOF(formats),
.pViewFormats = formats
};
formats[0] = imageInfo.format;
formats[1] = convertFormat(texture->format, LINEAR);
imageFormatList.pNext = imageInfo.pNext;
imageInfo.pNext = &imageFormatList;
}
VK(vkCreateImage(state.device, &imageInfo, NULL, &texture->handle), "Could not create texture") return false;
nickname(texture->handle, VK_OBJECT_TYPE_IMAGE, info->label);
gpu_memory_type memoryType;
bool transient = info->usage == GPU_TEXTURE_RENDER;
switch (info->format) {
case GPU_FORMAT_D16: memoryType = transient ? GPU_MEMORY_TEXTURE_LAZY_D16 : GPU_MEMORY_TEXTURE_D16; break;
case GPU_FORMAT_D32F: memoryType = transient ? GPU_MEMORY_TEXTURE_LAZY_D32F : GPU_MEMORY_TEXTURE_D32F; break;
case GPU_FORMAT_D24S8: memoryType = transient ? GPU_MEMORY_TEXTURE_LAZY_D24S8 : GPU_MEMORY_TEXTURE_D24S8; break;
case GPU_FORMAT_D32FS8: memoryType = transient ? GPU_MEMORY_TEXTURE_LAZY_D32FS8 : GPU_MEMORY_TEXTURE_D32FS8; break;
default: memoryType = transient ? GPU_MEMORY_TEXTURE_LAZY_COLOR : GPU_MEMORY_TEXTURE_COLOR; break;
}
VkDeviceSize offset;
VkMemoryRequirements requirements;
vkGetImageMemoryRequirements(state.device, texture->handle, &requirements);
gpu_memory* memory = gpu_allocate(memoryType, requirements, &offset);
VK(vkBindImageMemory(state.device, texture->handle, memory->handle, offset), "Could not bind texture memory") {
vkDestroyImage(state.device, texture->handle, NULL);
gpu_release(memory);
return false;
}
if (!gpu_texture_init_view(texture, &viewInfo)) {
vkDestroyImage(state.device, texture->handle, NULL);
gpu_release(memory);
return false;
}
if (info->upload.stream) {
VkImage image = texture->handle;
VkCommandBuffer commands = info->upload.stream->commands;
uint32_t levelCount = info->upload.levelCount;
gpu_buffer* buffer = info->upload.buffer;
VkImageMemoryBarrier2KHR transition = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR,
.image = image,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.subresourceRange.aspectMask = texture->aspect,
.subresourceRange.baseMipLevel = 0,
.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS,
.subresourceRange.baseArrayLayer = 0,
.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS
};
VkDependencyInfoKHR barrier = {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.pImageMemoryBarriers = &transition,
.imageMemoryBarrierCount = 1
};
if (levelCount > 0) {
transition.srcStageMask = VK_PIPELINE_STAGE_2_NONE_KHR;
transition.dstStageMask = VK_PIPELINE_STAGE_2_COPY_BIT_KHR;
transition.srcAccessMask = VK_ACCESS_2_NONE_KHR;
transition.dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR;
transition.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
transition.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
vkCmdPipelineBarrier2KHR(commands, &barrier);
VkBufferImageCopy copies[16];
for (uint32_t i = 0; i < levelCount; i++) {
copies[i] = (VkBufferImageCopy) {
.bufferOffset = info->upload.levelOffsets[i],
.imageSubresource.aspectMask = texture->aspect,
.imageSubresource.mipLevel = i,
.imageSubresource.baseArrayLayer = 0,
.imageSubresource.layerCount = texture->layers ? info->size[2] : 1,
.imageExtent.width = MAX(info->size[0] >> i, 1),
.imageExtent.height = MAX(info->size[1] >> i, 1),
.imageExtent.depth = texture->layers ? 1 : MAX(info->size[2] >> i, 1)
};
}
vkCmdCopyBufferToImage(commands, buffer->handle, image, transition.newLayout, levelCount, copies);
// Generate mipmaps
if (info->upload.generateMipmaps) {
transition.srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT_KHR;
transition.dstStageMask = VK_PIPELINE_STAGE_2_BLIT_BIT_KHR;
transition.srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR;
transition.dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT_KHR;
transition.oldLayout = transition.newLayout;
transition.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
transition.subresourceRange.baseMipLevel = 0;
transition.subresourceRange.levelCount = levelCount;
vkCmdPipelineBarrier2KHR(commands, &barrier);
for (uint32_t i = levelCount; i < info->mipmaps; i++) {
VkImageBlit region = {
.srcSubresource = {
.aspectMask = texture->aspect,
.mipLevel = i - 1,
.layerCount = texture->layers ? info->size[2] : 1
},
.dstSubresource = {
.aspectMask = texture->aspect,
.mipLevel = i,
.layerCount = texture->layers ? info->size[2] : 1
},
.srcOffsets[1] = { MAX(info->size[0] >> (i - 1), 1), MAX(info->size[1] >> (i - 1), 1), 1 },
.dstOffsets[1] = { MAX(info->size[0] >> i, 1), MAX(info->size[1] >> i, 1), 1 }
};
vkCmdBlitImage(commands, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region, VK_FILTER_LINEAR);
transition.srcStageMask = VK_PIPELINE_STAGE_2_BLIT_BIT_KHR;
transition.dstStageMask = VK_PIPELINE_STAGE_2_BLIT_BIT_KHR;
transition.srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR;
transition.dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT_KHR;
transition.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
transition.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
transition.subresourceRange.baseMipLevel = i;
transition.subresourceRange.levelCount = 1;
vkCmdPipelineBarrier2KHR(commands, &barrier);
}
}
}
// Transition to natural layout
transition.srcStageMask = VK_PIPELINE_STAGE_2_COPY_BIT_KHR | VK_PIPELINE_STAGE_2_BLIT_BIT_KHR;
transition.dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR;
transition.srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR;
transition.dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR | VK_ACCESS_2_MEMORY_WRITE_BIT_KHR;
transition.oldLayout = transition.newLayout;
transition.newLayout = texture->layout;
transition.subresourceRange.baseMipLevel = 0;
transition.subresourceRange.levelCount = info->mipmaps;
vkCmdPipelineBarrier2KHR(commands, &barrier);
}
texture->memory = memory - state.memory;
return true;
}
bool gpu_texture_init_view(gpu_texture* texture, gpu_texture_view_info* info) {
if (texture != info->source) {
texture->handle = info->source->handle;
texture->memory = ~0u;
texture->aspect = info->source->aspect;
texture->layout = info->source->layout;
texture->layers = info->layerCount ? info->layerCount : (info->source->layers - info->layerIndex);
texture->format = info->source->format;
texture->srgb = info->srgb;
}
VkImageViewType type;
switch (info->type) {
case GPU_TEXTURE_2D: type = VK_IMAGE_VIEW_TYPE_2D; break;
case GPU_TEXTURE_3D: type = VK_IMAGE_VIEW_TYPE_3D; break;
case GPU_TEXTURE_CUBE: type = texture->layers > 6 ? VK_IMAGE_VIEW_TYPE_CUBE_ARRAY : VK_IMAGE_VIEW_TYPE_CUBE; break;
case GPU_TEXTURE_ARRAY: type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; break;
}
VkImageViewUsageCreateInfo usage = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
.usage =
((info->usage & GPU_TEXTURE_SAMPLE) ? VK_IMAGE_USAGE_SAMPLED_BIT : 0) |
(((info->usage & GPU_TEXTURE_RENDER) && texture->aspect == VK_IMAGE_ASPECT_COLOR_BIT) ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT : 0) |
(((info->usage & GPU_TEXTURE_RENDER) && texture->aspect != VK_IMAGE_ASPECT_COLOR_BIT) ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : 0) |
((info->usage & GPU_TEXTURE_STORAGE) && !texture->srgb ? VK_IMAGE_USAGE_STORAGE_BIT : 0)
};
if (usage.usage == 0) {
texture->view = VK_NULL_HANDLE;
return true;
}
VkImageViewCreateInfo createInfo = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = &usage,
.image = info->source->handle,
.viewType = type,
.format = convertFormat(texture->format, texture->srgb),
.subresourceRange = {
.aspectMask = texture->aspect,
.baseMipLevel = info->levelIndex,
.levelCount = info->levelCount ? info->levelCount : VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = info ? info->layerIndex : 0,
.layerCount = info->layerCount ? info->layerCount : VK_REMAINING_ARRAY_LAYERS
}
};
VK(vkCreateImageView(state.device, &createInfo, NULL, &texture->view), "Could not create texture view") {
return false;
}
return true;
}
void gpu_texture_destroy(gpu_texture* texture) {
condemn(texture->view, VK_OBJECT_TYPE_IMAGE_VIEW);
if (texture->memory == ~0u) return;
condemn(texture->handle, VK_OBJECT_TYPE_IMAGE);
gpu_release(state.memory + texture->memory);
}
// Surface
bool gpu_surface_init(gpu_surface_info* info) {
if (!state.extensions.surface || !state.extensions.surfaceOS || !state.extensions.swapchain) {
LOG("Surface unavailable because a required extension is not supported by the GPU");
return false;
}
gpu_surface* surface = &state.surface;
#if defined(_WIN32)
VkWin32SurfaceCreateInfoKHR surfaceInfo = {
.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
.hinstance = (HINSTANCE) info->win32.instance,
.hwnd = (HWND) info->win32.window
};
GPU_DECLARE(vkCreateWin32SurfaceKHR);
GPU_LOAD_INSTANCE(vkCreateWin32SurfaceKHR);
VK(vkCreateWin32SurfaceKHR(state.instance, &surfaceInfo, NULL, &surface->handle), "Failed to create surface") return false;
#elif defined(__APPLE__)
VkMetalSurfaceCreateInfoEXT surfaceInfo = {
.sType = VK_STRUCTURE_TYPE_METAL_SURFACE_CREATE_INFO_EXT,
.pLayer = (const CAMetalLayer*) info->macos.layer
};
GPU_DECLARE(vkCreateMetalSurfaceEXT);
GPU_LOAD_INSTANCE(vkCreateMetalSurfaceEXT);
VK(vkCreateMetalSurfaceEXT(state.instance, &surfaceInfo, NULL, &surface->handle), "Failed to create surface") return false;
#elif defined(__linux__) && !defined(__ANDROID__)
VkXcbSurfaceCreateInfoKHR surfaceInfo = {
.sType = VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR,
.connection = (xcb_connection_t*) info->xcb.connection,
.window = (xcb_window_t) info->xcb.window
};
GPU_DECLARE(vkCreateXcbSurfaceKHR);
GPU_LOAD_INSTANCE(vkCreateXcbSurfaceKHR);
VK(vkCreateXcbSurfaceKHR(state.instance, &surfaceInfo, NULL, &surface->handle), "Failed to create surface") return false;
#endif
VkBool32 presentable;
vkGetPhysicalDeviceSurfaceSupportKHR(state.adapter, state.queueFamilyIndex, surface->handle, &presentable);
// The most correct thing to do is to incorporate presentation support into the init-time process
// for selecting a physical device and queue family. We currently choose not to do this
// deliberately, because A) it's more complicated, B) in normal circumstances OpenXR picks the
// physical device, not us, and C) we don't support multiple GPUs or multiple queues, so we
// aren't able to support the tricky case and would just end up failing/erroring anyway.
if (!presentable) {
LOG("Surface unavailable because the GPU used for rendering does not support presenting to the surface");
vkDestroySurfaceKHR(state.instance, surface->handle, NULL);
return false;
}
vkGetPhysicalDeviceSurfaceCapabilitiesKHR(state.adapter, surface->handle, &surface->capabilities);
VkSurfaceFormatKHR formats[64];
uint32_t formatCount = COUNTOF(formats);
vkGetPhysicalDeviceSurfaceFormatsKHR(state.adapter, surface->handle, &formatCount, formats);
for (uint32_t i = 0; i < formatCount; i++) {
if (formats[i].format == VK_FORMAT_R8G8B8A8_SRGB || formats[i].format == VK_FORMAT_B8G8R8A8_SRGB) {
surface->format = formats[i];
break;
}
}
if (surface->format.format == VK_FORMAT_UNDEFINED) {
LOG("Surface unavailable because no supported texture format is available");
vkDestroySurfaceKHR(state.instance, surface->handle, NULL);
return false;
}
surface->imageIndex = ~0u;
surface->vsync = info->vsync;
gpu_surface_resize(info->width, info->height);
return true;
}
void gpu_surface_resize(uint32_t width, uint32_t height) {
if (width == 0 || height == 0) {
state.surface.valid = false;
return;
}
gpu_surface* surface = &state.surface;
VkSwapchainKHR oldSwapchain = surface->swapchain;
if (oldSwapchain) {
vkDeviceWaitIdle(state.device);
}
VkSwapchainCreateInfoKHR swapchainInfo = {
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
.surface = surface->handle,
.minImageCount = surface->capabilities.minImageCount,
.imageFormat = surface->format.format,
.imageColorSpace = surface->format.colorSpace,
.imageExtent = { width, height },
.imageArrayLayers = 1,
.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
.presentMode = surface->vsync ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR,
.clipped = VK_TRUE,
.oldSwapchain = oldSwapchain
};
VK(vkCreateSwapchainKHR(state.device, &swapchainInfo, NULL, &surface->swapchain), "Failed to create swapchain") return;
if (oldSwapchain) {
for (uint32_t i = 0; i < COUNTOF(surface->images); i++) {
if (surface->images[i].view) {
vkDestroyImageView(state.device, surface->images[i].view, NULL);
}
}
memset(surface->images, 0, sizeof(surface->images));
vkDestroySwapchainKHR(state.device, oldSwapchain, NULL);
}
uint32_t imageCount;
VkImage images[COUNTOF(surface->images)];
VK(vkGetSwapchainImagesKHR(state.device, surface->swapchain, &imageCount, NULL), "Failed to get swapchain images") return;
VK(imageCount > COUNTOF(images) ? VK_ERROR_TOO_MANY_OBJECTS : VK_SUCCESS, "Failed to get swapchain images") return;
VK(vkGetSwapchainImagesKHR(state.device, surface->swapchain, &imageCount, images), "Failed to get swapchain images") return;
for (uint32_t i = 0; i < imageCount; i++) {
gpu_texture* texture = &surface->images[i];
texture->handle = images[i];
texture->aspect = VK_IMAGE_ASPECT_COLOR_BIT;
texture->layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
texture->memory = ~0u;
texture->layers = 1;
texture->format = GPU_FORMAT_SURFACE;
texture->srgb = true;
gpu_texture_view_info view = {
.source = texture,
.type = GPU_TEXTURE_2D,
.usage = GPU_TEXTURE_RENDER
};
CHECK(gpu_texture_init_view(texture, &view), "Failed to create swapchain texture views") return;
}
surface->valid = true;
}
gpu_texture* gpu_surface_acquire(void) {
if (!state.surface.valid) {
return NULL;
}
gpu_surface* surface = &state.surface;
gpu_tick* tick = &state.ticks[state.tick[CPU] & TICK_MASK];
VkResult result = vkAcquireNextImageKHR(state.device, surface->swapchain, UINT64_MAX, tick->semaphores[0], VK_NULL_HANDLE, &surface->imageIndex);
if (result == VK_ERROR_OUT_OF_DATE_KHR) {
surface->imageIndex = ~0u;
surface->valid = false;
return NULL;
} else {
vcheck(result, "Failed to acquire swapchain");
}
surface->semaphore = tick->semaphores[0];
return &surface->images[surface->imageIndex];
}
void gpu_surface_present(void) {
VkSemaphore semaphore = state.ticks[state.tick[CPU] & TICK_MASK].semaphores[1];
VkSubmitInfo submit = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &semaphore
};
VK(vkQueueSubmit(state.queue, 1, &submit, VK_NULL_HANDLE), "Queue submit failed") {}
VkPresentInfoKHR present = {
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &semaphore,
.swapchainCount = 1,
.pSwapchains = &state.surface.swapchain,
.pImageIndices = &state.surface.imageIndex
};
VkResult result = vkQueuePresentKHR(state.queue, &present);
if (result == VK_ERROR_OUT_OF_DATE_KHR) {
state.surface.valid = false;
} else {
vcheck(result, "Queue present failed");
}
state.surface.imageIndex = ~0u;
}
// Sampler
bool gpu_sampler_init(gpu_sampler* sampler, gpu_sampler_info* info) {
static const VkFilter filters[] = {
[GPU_FILTER_NEAREST] = VK_FILTER_NEAREST,
[GPU_FILTER_LINEAR] = VK_FILTER_LINEAR
};
static const VkSamplerMipmapMode mipFilters[] = {
[GPU_FILTER_NEAREST] = VK_SAMPLER_MIPMAP_MODE_NEAREST,
[GPU_FILTER_LINEAR] = VK_SAMPLER_MIPMAP_MODE_LINEAR
};
static const VkSamplerAddressMode wraps[] = {
[GPU_WRAP_CLAMP] = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
[GPU_WRAP_REPEAT] = VK_SAMPLER_ADDRESS_MODE_REPEAT,
[GPU_WRAP_MIRROR] = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT
};
static const VkCompareOp compareOps[] = {
[GPU_COMPARE_NONE] = VK_COMPARE_OP_ALWAYS,
[GPU_COMPARE_EQUAL] = VK_COMPARE_OP_EQUAL,
[GPU_COMPARE_NEQUAL] = VK_COMPARE_OP_NOT_EQUAL,
[GPU_COMPARE_LESS] = VK_COMPARE_OP_LESS,
[GPU_COMPARE_LEQUAL] = VK_COMPARE_OP_LESS_OR_EQUAL,
[GPU_COMPARE_GREATER] = VK_COMPARE_OP_GREATER,
[GPU_COMPARE_GEQUAL] = VK_COMPARE_OP_GREATER_OR_EQUAL
};
VkSamplerCreateInfo samplerInfo = {
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.magFilter = filters[info->mag],
.minFilter = filters[info->min],
.mipmapMode = mipFilters[info->mip],
.addressModeU = wraps[info->wrap[0]],
.addressModeV = wraps[info->wrap[1]],
.addressModeW = wraps[info->wrap[2]],
.anisotropyEnable = info->anisotropy >= 1.f,
.maxAnisotropy = info->anisotropy,
.compareEnable = info->compare != GPU_COMPARE_NONE,
.compareOp = compareOps[info->compare],
.minLod = info->lodClamp[0],
.maxLod = info->lodClamp[1] < 0.f ? VK_LOD_CLAMP_NONE : info->lodClamp[1]
};
VK(vkCreateSampler(state.device, &samplerInfo, NULL, &sampler->handle), "Could not create sampler") {
return false;
}
return true;
}
void gpu_sampler_destroy(gpu_sampler* sampler) {
condemn(sampler->handle, VK_OBJECT_TYPE_SAMPLER);
}
// Layout
bool gpu_layout_init(gpu_layout* layout, gpu_layout_info* info) {
static const VkDescriptorType types[] = {
[GPU_SLOT_UNIFORM_BUFFER] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
[GPU_SLOT_STORAGE_BUFFER] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
[GPU_SLOT_UNIFORM_BUFFER_DYNAMIC] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
[GPU_SLOT_STORAGE_BUFFER_DYNAMIC] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC,
[GPU_SLOT_SAMPLED_TEXTURE] = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
[GPU_SLOT_STORAGE_TEXTURE] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
[GPU_SLOT_SAMPLER] = VK_DESCRIPTOR_TYPE_SAMPLER
};
VkDescriptorSetLayoutBinding bindings[32];
for (uint32_t i = 0; i < info->count; i++) {
bindings[i] = (VkDescriptorSetLayoutBinding) {
.binding = info->slots[i].number,
.descriptorType = types[info->slots[i].type],
.descriptorCount = 1,
.stageFlags =
(((info->slots[i].stages & GPU_STAGE_VERTEX) ? VK_SHADER_STAGE_VERTEX_BIT : 0) |
((info->slots[i].stages & GPU_STAGE_FRAGMENT) ? VK_SHADER_STAGE_FRAGMENT_BIT : 0) |
((info->slots[i].stages & GPU_STAGE_COMPUTE) ? VK_SHADER_STAGE_COMPUTE_BIT : 0))
};
}
VkDescriptorSetLayoutCreateInfo layoutInfo = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = info->count,
.pBindings = bindings
};
VK(vkCreateDescriptorSetLayout(state.device, &layoutInfo, NULL, &layout->handle), "Failed to create layout") {
return false;
}
memset(layout->descriptorCounts, 0, sizeof(layout->descriptorCounts));
for (uint32_t i = 0; i < info->count; i++) {
layout->descriptorCounts[info->slots[i].type]++;
}
return true;
}
void gpu_layout_destroy(gpu_layout* layout) {
condemn(layout->handle, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT);
}
// Shader
bool gpu_shader_init(gpu_shader* shader, gpu_shader_info* info) {
struct { VkShaderStageFlags flags; gpu_shader_stage* source; } stages[] = {
{ VK_SHADER_STAGE_VERTEX_BIT, &info->vertex },
{ VK_SHADER_STAGE_FRAGMENT_BIT, &info->fragment },
{ VK_SHADER_STAGE_COMPUTE_BIT, &info->compute }
};
uint32_t stageCount = 0;
VkShaderStageFlags stageFlags = 0;
for (uint32_t i = 0; i < COUNTOF(stages); i++) {
if (!stages[i].source->code) continue;
VkShaderModuleCreateInfo moduleInfo = {
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.codeSize = stages[i].source->length,
.pCode = stages[i].source->code
};
VK(vkCreateShaderModule(state.device, &moduleInfo, NULL, &shader->handles[stageCount]), "Failed to load shader") {
return false;
}
nickname(shader->handles[i], VK_OBJECT_TYPE_SHADER_MODULE, info->label);
stageFlags |= stages[i].flags;
stageCount++;
}
VkDescriptorSetLayout layouts[4];
VkPipelineLayoutCreateInfo pipelineLayoutInfo = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pSetLayouts = layouts,
.pushConstantRangeCount = info->pushConstantSize > 0,
.pPushConstantRanges = &(VkPushConstantRange) {
.stageFlags = stageFlags,
.offset = 0,
.size = info->pushConstantSize
}
};
for (uint32_t i = 0; i < COUNTOF(info->layouts) && info->layouts[i]; i++) {
layouts[i] = info->layouts[i]->handle;
pipelineLayoutInfo.setLayoutCount++;
}
VK(vkCreatePipelineLayout(state.device, &pipelineLayoutInfo, NULL, &shader->pipelineLayout), "Failed to create pipeline layout") {
gpu_shader_destroy(shader);
return false;
}
return true;
}
void gpu_shader_destroy(gpu_shader* shader) {
// The spec says it's safe to destroy shaders while still in use
if (shader->handles[0]) vkDestroyShaderModule(state.device, shader->handles[0], NULL);
if (shader->handles[1]) vkDestroyShaderModule(state.device, shader->handles[1], NULL);
condemn(shader->pipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT);
}
// Bundles
bool gpu_bundle_pool_init(gpu_bundle_pool* pool, gpu_bundle_pool_info* info) {
VkDescriptorPoolSize sizes[7] = {
[GPU_SLOT_UNIFORM_BUFFER] = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0 },
[GPU_SLOT_STORAGE_BUFFER] = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 0 },
[GPU_SLOT_UNIFORM_BUFFER_DYNAMIC] = { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 0 },
[GPU_SLOT_STORAGE_BUFFER_DYNAMIC] = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 0 },
[GPU_SLOT_SAMPLED_TEXTURE] = { VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 0 },
[GPU_SLOT_STORAGE_TEXTURE] = { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0 },
[GPU_SLOT_SAMPLER] = { VK_DESCRIPTOR_TYPE_SAMPLER, 0 }
};
if (info->layout) {
for (uint32_t i = 0; i < COUNTOF(sizes); i++) {
sizes[i].descriptorCount = info->layout->descriptorCounts[i] * info->count;
}
} else {
for (uint32_t i = 0; i < info->count; i++) {
for (uint32_t j = 0; j < COUNTOF(sizes); j++) {
sizes[j].descriptorCount += info->contents[i].layout->descriptorCounts[j];
}
}
}
// Descriptor counts of zero are forbidden, so swap any zero-sized sizes with the last entry
uint32_t poolSizeCount = COUNTOF(sizes);
for (uint32_t i = 0; i < poolSizeCount; i++) {
if (sizes[i].descriptorCount == 0) {
VkDescriptorPoolSize last = sizes[poolSizeCount - 1];
sizes[poolSizeCount - 1] = sizes[i];
sizes[i] = last;
poolSizeCount--;
i--;
}
}
VkDescriptorPoolCreateInfo poolInfo = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.maxSets = info->count,
.poolSizeCount = poolSizeCount,
.pPoolSizes = sizes
};
VK(vkCreateDescriptorPool(state.device, &poolInfo, NULL, &pool->handle), "Could not create bundle pool") {
return false;
}
VkDescriptorSetLayout layouts[512];
for (uint32_t i = 0; i < info->count; i+= COUNTOF(layouts)) {
uint32_t chunk = MIN(info->count - i, COUNTOF(layouts));
for (uint32_t j = 0; j < chunk; j++) {
layouts[j] = info->layout ? info->layout->handle : info->contents[i + j].layout->handle;
}
VkDescriptorSetAllocateInfo allocateInfo = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = pool->handle,
.descriptorSetCount = chunk,
.pSetLayouts = layouts
};
VK(vkAllocateDescriptorSets(state.device, &allocateInfo, &info->bundles[i].handle), "Could not allocate descriptor sets") {
gpu_bundle_pool_destroy(pool);
return false;
}
}
return true;
}
void gpu_bundle_pool_destroy(gpu_bundle_pool* pool) {
condemn(pool->handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL);
}
void gpu_bundle_write(gpu_bundle** bundles, gpu_bundle_info* infos, uint32_t count) {
VkDescriptorBufferInfo bufferInfo[256];
VkDescriptorImageInfo imageInfo[256];
VkWriteDescriptorSet writes[256];
uint32_t bufferCount = 0;
uint32_t imageCount = 0;
uint32_t writeCount = 0;
static const VkDescriptorType types[] = {
[GPU_SLOT_UNIFORM_BUFFER] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
[GPU_SLOT_STORAGE_BUFFER] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
[GPU_SLOT_UNIFORM_BUFFER_DYNAMIC] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
[GPU_SLOT_STORAGE_BUFFER_DYNAMIC] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC,
[GPU_SLOT_SAMPLED_TEXTURE] = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
[GPU_SLOT_STORAGE_TEXTURE] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
[GPU_SLOT_SAMPLER] = VK_DESCRIPTOR_TYPE_SAMPLER
};
for (uint32_t i = 0; i < count; i++) {
gpu_bundle_info* info = &infos[i];
for (uint32_t j = 0; j < info->count; j++) {
gpu_binding* binding = &info->bindings[j];
VkDescriptorType type = types[binding->type];
gpu_buffer_binding* buffers = binding->count > 0 ? binding->buffers : &binding->buffer;
gpu_texture** textures = binding->count > 0 ? binding->textures : &binding->texture;
gpu_sampler** samplers = binding->count > 0 ? binding->samplers : &binding->sampler;
bool texture = type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
bool sampler = type == VK_DESCRIPTOR_TYPE_SAMPLER;
bool image = texture || sampler;
uint32_t index = 0;
uint32_t descriptorCount = MAX(binding->count, 1);
while (index < descriptorCount) {
uint32_t available = image ? COUNTOF(imageInfo) - imageCount : COUNTOF(bufferInfo) - bufferCount;
uint32_t chunk = MIN(descriptorCount - index, available);
writes[writeCount++] = (VkWriteDescriptorSet) {
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = bundles[i]->handle,
.dstBinding = binding->number,
.dstArrayElement = index,
.descriptorCount = chunk,
.descriptorType = type,
.pBufferInfo = &bufferInfo[bufferCount],
.pImageInfo = &imageInfo[imageCount]
};
if (sampler) {
for (uint32_t n = 0; n < chunk; n++, index++) {
imageInfo[imageCount++] = (VkDescriptorImageInfo) {
.sampler = samplers[index]->handle
};
}
} else if (texture) {
for (uint32_t n = 0; n < chunk; n++, index++) {
imageInfo[imageCount++] = (VkDescriptorImageInfo) {
.imageView = textures[index]->view,
.imageLayout = textures[index]->layout
};
}
} else {
for (uint32_t n = 0; n < chunk; n++, index++) {
bufferInfo[bufferCount++] = (VkDescriptorBufferInfo) {
.buffer = buffers[index].object->handle,
.offset = buffers[index].offset,
.range = buffers[index].extent
};
}
}
if ((image ? imageCount >= COUNTOF(imageInfo) : bufferCount >= COUNTOF(bufferInfo)) || writeCount >= COUNTOF(writes)) {
vkUpdateDescriptorSets(state.device, writeCount, writes, 0, NULL);
bufferCount = imageCount = writeCount = 0;
}
}
}
}
if (writeCount > 0) {
vkUpdateDescriptorSets(state.device, writeCount, writes, 0, NULL);
}
}
// Canvas
bool gpu_pass_init(gpu_pass* pass, gpu_pass_info* info) {
static const VkAttachmentLoadOp loadOps[] = {
[GPU_LOAD_OP_CLEAR] = VK_ATTACHMENT_LOAD_OP_CLEAR,
[GPU_LOAD_OP_DISCARD] = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
[GPU_LOAD_OP_KEEP] = VK_ATTACHMENT_LOAD_OP_LOAD
};
static const VkAttachmentStoreOp storeOps[] = {
[GPU_SAVE_OP_KEEP] = VK_ATTACHMENT_STORE_OP_STORE,
[GPU_SAVE_OP_DISCARD] = VK_ATTACHMENT_STORE_OP_DONT_CARE
};
VkAttachmentDescription2 attachments[10];
VkAttachmentReference2 references[10];
for (uint32_t i = 0; i < info->colorCount; i++) {
references[i] = (VkAttachmentReference2) {
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
.layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR,
.attachment = i
};
attachments[i] = (VkAttachmentDescription2) {
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
.format = convertFormat(info->color[i].format, info->color[i].srgb),
.samples = info->samples,
.loadOp = loadOps[info->color[i].load],
.storeOp = info->resolveColor ? VK_ATTACHMENT_STORE_OP_DONT_CARE : storeOps[info->color[i].save],
.initialLayout = references[i].layout,
.finalLayout = references[i].layout
};
}
if (info->resolveColor) {
for (uint32_t i = 0; i < info->colorCount; i++) {
uint32_t index = info->colorCount + i;
references[index] = (VkAttachmentReference2) {
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
.layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR,
.attachment = index
};
attachments[index] = (VkAttachmentDescription2) {
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
.format = attachments[i].format,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.storeOp = storeOps[info->color[i].save],
.initialLayout = references[index].layout,
.finalLayout = references[index].layout
};
}
}
bool depth = !!info->depth.format;
if (depth) {
uint32_t index = info->colorCount << info->resolveColor;
references[index] = (VkAttachmentReference2) {
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
.layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR,
.attachment = index
};
attachments[index] = (VkAttachmentDescription2) {
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
.format = convertFormat(info->depth.format, LINEAR),
.samples = info->samples,
.loadOp = loadOps[info->depth.load],
.storeOp = info->resolveDepth ? VK_ATTACHMENT_STORE_OP_DONT_CARE : storeOps[info->depth.save],
.stencilLoadOp = loadOps[info->depth.stencilLoad],
.stencilStoreOp = info->resolveDepth ? VK_ATTACHMENT_STORE_OP_DONT_CARE : storeOps[info->depth.stencilSave],
.initialLayout = references[index].layout,
.finalLayout = references[index].layout
};
if (info->resolveDepth) {
references[index + 1] = (VkAttachmentReference2) {
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
.layout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR,
.attachment = index + 1
};
attachments[index + 1] = (VkAttachmentDescription2) {
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
.format = attachments[index].format,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.storeOp = storeOps[info->depth.save],
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = storeOps[info->depth.stencilSave],
.initialLayout = references[index + 1].layout,
.finalLayout = references[index + 1].layout
};
}
}
uint32_t attachmentCount = (info->colorCount << info->resolveColor) + (depth << info->resolveDepth);
VkSubpassDescription2 subpass = {
.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
.pNext = info->resolveDepth ? &(VkSubpassDescriptionDepthStencilResolve) {
.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE,
.depthResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
.stencilResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
.pDepthStencilResolveAttachment = &references[attachmentCount - 1]
} : NULL,
.viewMask = (1 << info->views) - 1,
.colorAttachmentCount = info->colorCount,
.pColorAttachments = &references[0],
.pResolveAttachments = info->resolveColor ? &references[info->colorCount] : NULL,
.pDepthStencilAttachment = depth ? &references[attachmentCount - 1 - info->resolveDepth] : NULL
};
VkRenderPassCreateInfo2 createInfo = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
.attachmentCount = attachmentCount,
.pAttachments = attachments,
.subpassCount = 1,
.pSubpasses = &subpass
};
VK(vkCreateRenderPass2KHR(state.device, &createInfo, NULL, &pass->handle), "Could not create render pass") {
return false;
}
pass->colorCount = info->colorCount;
pass->samples = info->samples;
pass->loadMask = 0;
for (uint32_t i = 0; i < pass->colorCount; i++) {
pass->loadMask |= (info->color[i].load == GPU_LOAD_OP_KEEP) ? (1 << i) : 0;
}
pass->depthLoad = info->depth.load == GPU_LOAD_OP_KEEP;
pass->surface = info->surface;
return true;
}
void gpu_pass_destroy(gpu_pass* pass) {
condemn(pass->handle, VK_OBJECT_TYPE_RENDER_PASS);
}
// Pipeline
bool gpu_pipeline_init_graphics(gpu_pipeline* pipeline, gpu_pipeline_info* info) {
static const VkPrimitiveTopology topologies[] = {
[GPU_DRAW_POINTS] = VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
[GPU_DRAW_LINES] = VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
[GPU_DRAW_TRIANGLES] = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST
};
static const VkFormat attributeTypes[] = {
[GPU_TYPE_I8x4] = VK_FORMAT_R8G8B8A8_SINT,
[GPU_TYPE_U8x4] = VK_FORMAT_R8G8B8A8_UINT,
[GPU_TYPE_SN8x4] = VK_FORMAT_R8G8B8A8_SNORM,
[GPU_TYPE_UN8x4] = VK_FORMAT_R8G8B8A8_UNORM,
[GPU_TYPE_SN10x3] = VK_FORMAT_A2B10G10R10_SNORM_PACK32,
[GPU_TYPE_UN10x3] = VK_FORMAT_A2B10G10R10_UNORM_PACK32,
[GPU_TYPE_I16] = VK_FORMAT_R16_SINT,
[GPU_TYPE_I16x2] = VK_FORMAT_R16G16_SINT,
[GPU_TYPE_I16x4] = VK_FORMAT_R16G16B16A16_SINT,
[GPU_TYPE_U16] = VK_FORMAT_R16_UINT,
[GPU_TYPE_U16x2] = VK_FORMAT_R16G16_UINT,
[GPU_TYPE_U16x4] = VK_FORMAT_R16G16B16A16_UINT,
[GPU_TYPE_SN16x2] = VK_FORMAT_R16G16_SNORM,
[GPU_TYPE_SN16x4] = VK_FORMAT_R16G16B16A16_SNORM,
[GPU_TYPE_UN16x2] = VK_FORMAT_R16G16_UNORM,
[GPU_TYPE_UN16x4] = VK_FORMAT_R16G16B16A16_UNORM,
[GPU_TYPE_I32] = VK_FORMAT_R32_SINT,
[GPU_TYPE_I32x2] = VK_FORMAT_R32G32_SINT,
[GPU_TYPE_I32x3] = VK_FORMAT_R32G32B32_SINT,
[GPU_TYPE_I32x4] = VK_FORMAT_R32G32B32A32_SINT,
[GPU_TYPE_U32] = VK_FORMAT_R32_UINT,
[GPU_TYPE_U32x2] = VK_FORMAT_R32G32_UINT,
[GPU_TYPE_U32x3] = VK_FORMAT_R32G32B32_UINT,
[GPU_TYPE_U32x4] = VK_FORMAT_R32G32B32A32_UINT,
[GPU_TYPE_F16x2] = VK_FORMAT_R16G16_SFLOAT,
[GPU_TYPE_F16x4] = VK_FORMAT_R16G16B16A16_SFLOAT,
[GPU_TYPE_F32] = VK_FORMAT_R32_SFLOAT,
[GPU_TYPE_F32x2] = VK_FORMAT_R32G32_SFLOAT,
[GPU_TYPE_F32x3] = VK_FORMAT_R32G32B32_SFLOAT,
[GPU_TYPE_F32x4] = VK_FORMAT_R32G32B32A32_SFLOAT
};
static const VkCullModeFlagBits cullModes[] = {
[GPU_CULL_NONE] = VK_CULL_MODE_NONE,
[GPU_CULL_FRONT] = VK_CULL_MODE_FRONT_BIT,
[GPU_CULL_BACK] = VK_CULL_MODE_BACK_BIT
};
static const VkFrontFace frontFaces[] = {
[GPU_WINDING_CCW] = VK_FRONT_FACE_COUNTER_CLOCKWISE,
[GPU_WINDING_CW] = VK_FRONT_FACE_CLOCKWISE
};
static const VkCompareOp compareOps[] = {
[GPU_COMPARE_NONE] = VK_COMPARE_OP_ALWAYS,
[GPU_COMPARE_EQUAL] = VK_COMPARE_OP_EQUAL,
[GPU_COMPARE_NEQUAL] = VK_COMPARE_OP_NOT_EQUAL,
[GPU_COMPARE_LESS] = VK_COMPARE_OP_LESS,
[GPU_COMPARE_LEQUAL] = VK_COMPARE_OP_LESS_OR_EQUAL,
[GPU_COMPARE_GREATER] = VK_COMPARE_OP_GREATER,
[GPU_COMPARE_GEQUAL] = VK_COMPARE_OP_GREATER_OR_EQUAL
};
static const VkStencilOp stencilOps[] = {
[GPU_STENCIL_KEEP] = VK_STENCIL_OP_KEEP,
[GPU_STENCIL_ZERO] = VK_STENCIL_OP_ZERO,
[GPU_STENCIL_REPLACE] = VK_STENCIL_OP_REPLACE,
[GPU_STENCIL_INCREMENT] = VK_STENCIL_OP_INCREMENT_AND_CLAMP,
[GPU_STENCIL_DECREMENT] = VK_STENCIL_OP_DECREMENT_AND_CLAMP,
[GPU_STENCIL_INCREMENT_WRAP] = VK_STENCIL_OP_INCREMENT_AND_WRAP,
[GPU_STENCIL_DECREMENT_WRAP] = VK_STENCIL_OP_DECREMENT_AND_WRAP,
[GPU_STENCIL_INVERT] = VK_STENCIL_OP_INVERT
};
static const VkBlendFactor blendFactors[] = {
[GPU_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO,
[GPU_BLEND_ONE] = VK_BLEND_FACTOR_ONE,
[GPU_BLEND_SRC_COLOR] = VK_BLEND_FACTOR_SRC_COLOR,
[GPU_BLEND_ONE_MINUS_SRC_COLOR] = VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
[GPU_BLEND_SRC_ALPHA] = VK_BLEND_FACTOR_SRC_ALPHA,
[GPU_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
[GPU_BLEND_DST_COLOR] = VK_BLEND_FACTOR_DST_COLOR,
[GPU_BLEND_ONE_MINUS_DST_COLOR] = VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
[GPU_BLEND_DST_ALPHA] = VK_BLEND_FACTOR_DST_ALPHA,
[GPU_BLEND_ONE_MINUS_DST_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA
};
static const VkBlendOp blendOps[] = {
[GPU_BLEND_ADD] = VK_BLEND_OP_ADD,
[GPU_BLEND_SUB] = VK_BLEND_OP_SUBTRACT,
[GPU_BLEND_RSUB] = VK_BLEND_OP_REVERSE_SUBTRACT,
[GPU_BLEND_MIN] = VK_BLEND_OP_MIN,
[GPU_BLEND_MAX] = VK_BLEND_OP_MAX
};
VkVertexInputBindingDescription vertexBuffers[16];
for (uint32_t i = 0; i < info->vertex.bufferCount; i++) {
vertexBuffers[i] = (VkVertexInputBindingDescription) {
.binding = i,
.stride = info->vertex.bufferStrides[i],
.inputRate = (info->vertex.instancedBuffers & (1 << i)) ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX
};
}
VkVertexInputAttributeDescription vertexAttributes[COUNTOF(info->vertex.attributes)];
for (uint32_t i = 0; i < info->vertex.attributeCount; i++) {
vertexAttributes[i] = (VkVertexInputAttributeDescription) {
.location = info->vertex.attributes[i].location,
.binding = info->vertex.attributes[i].buffer,
.format = attributeTypes[info->vertex.attributes[i].type],
.offset = info->vertex.attributes[i].offset
};
}
VkPipelineVertexInputStateCreateInfo vertexInput = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = info->vertex.bufferCount,
.pVertexBindingDescriptions = vertexBuffers,
.vertexAttributeDescriptionCount = info->vertex.attributeCount,
.pVertexAttributeDescriptions = vertexAttributes
};
VkPipelineInputAssemblyStateCreateInfo inputAssembly = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = topologies[info->drawMode]
};
VkPipelineViewportStateCreateInfo viewport = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1
};
VkPipelineRasterizationStateCreateInfo rasterization = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.depthClampEnable = info->rasterizer.depthClamp,
.polygonMode = info->rasterizer.wireframe ? VK_POLYGON_MODE_LINE : VK_POLYGON_MODE_FILL,
.cullMode = cullModes[info->rasterizer.cullMode],
.frontFace = frontFaces[info->rasterizer.winding],
.depthBiasEnable = info->rasterizer.depthOffset != 0.f || info->rasterizer.depthOffsetSloped != 0.f,
.depthBiasConstantFactor = info->rasterizer.depthOffset,
.depthBiasSlopeFactor = info->rasterizer.depthOffsetSloped,
.lineWidth = 1.f
};
VkPipelineMultisampleStateCreateInfo multisample = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.rasterizationSamples = info->pass->samples,
.alphaToCoverageEnable = info->multisample.alphaToCoverage,
.alphaToOneEnable = info->multisample.alphaToOne
};
VkStencilOpState stencil = {
.failOp = stencilOps[info->stencil.failOp],
.passOp = stencilOps[info->stencil.passOp],
.depthFailOp = stencilOps[info->stencil.depthFailOp],
.compareOp = compareOps[info->stencil.test],
.compareMask = info->stencil.testMask,
.writeMask = info->stencil.writeMask,
.reference = info->stencil.value
};
VkPipelineDepthStencilStateCreateInfo depthStencil = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.depthTestEnable = info->depth.test != GPU_COMPARE_NONE || info->depth.write,
.depthWriteEnable = info->depth.write,
.depthCompareOp = compareOps[info->depth.test],
.stencilTestEnable =
info->stencil.test != GPU_COMPARE_NONE ||
info->stencil.failOp != GPU_STENCIL_KEEP ||
info->stencil.passOp != GPU_STENCIL_KEEP ||
info->stencil.depthFailOp != GPU_STENCIL_KEEP,
.front = stencil,
.back = stencil
};
VkPipelineColorBlendAttachmentState colorAttachments[4];
for (uint32_t i = 0; i < info->pass->colorCount; i++) {
colorAttachments[i] = (VkPipelineColorBlendAttachmentState) {
.blendEnable = info->blend[i].enabled,
.srcColorBlendFactor = blendFactors[info->blend[i].color.src],
.dstColorBlendFactor = blendFactors[info->blend[i].color.dst],
.colorBlendOp = blendOps[info->blend[i].color.op],
.srcAlphaBlendFactor = blendFactors[info->blend[i].alpha.src],
.dstAlphaBlendFactor = blendFactors[info->blend[i].alpha.dst],
.alphaBlendOp = blendOps[info->blend[i].alpha.op],
.colorWriteMask = info->colorMask[i]
};
}
VkPipelineColorBlendStateCreateInfo colorBlend = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.attachmentCount = info->pass->colorCount,
.pAttachments = colorAttachments
};
VkDynamicState dynamicStates[] = {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR
};
VkPipelineDynamicStateCreateInfo dynamicState = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = COUNTOF(dynamicStates),
.pDynamicStates = dynamicStates
};
uint32_t constants[32];
VkSpecializationMapEntry entries[32];
CHECK(info->flagCount <= COUNTOF(constants), "Too many specialization constants") return false;
for (uint32_t i = 0; i < info->flagCount; i++) {
gpu_shader_flag* flag = &info->flags[i];
switch (flag->type) {
case GPU_FLAG_B32: constants[i] = flag->value == 0. ? VK_FALSE : VK_TRUE; break;
case GPU_FLAG_I32: constants[i] = (uint32_t) flag->value; break;
case GPU_FLAG_U32: constants[i] = (uint32_t) flag->value; break;
case GPU_FLAG_F32: memcpy(&constants[i], &(float) { flag->value }, sizeof(float)); break;
default: flag->value = 0;
}
entries[i] = (VkSpecializationMapEntry) {
.constantID = flag->id,
.offset = i * sizeof(uint32_t),
.size = sizeof(uint32_t)
};
}
VkSpecializationInfo specialization = {
.mapEntryCount = info->flagCount,
.pMapEntries = entries,
.dataSize = sizeof(constants),
.pData = (const void*) constants
};
uint32_t stageCount = info->shader->handles[1] && info->pass->colorCount > 0 ? 2 : 1;
VkPipelineShaderStageCreateInfo shaders[2] = {
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = info->shader->handles[0],
.pName = "main",
.pSpecializationInfo = &specialization
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = info->shader->handles[1],
.pName = "main",
.pSpecializationInfo = &specialization
}
};
VkGraphicsPipelineCreateInfo pipelineInfo = (VkGraphicsPipelineCreateInfo) {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = stageCount,
.pStages = shaders,
.pVertexInputState = &vertexInput,
.pInputAssemblyState = &inputAssembly,
.pViewportState = &viewport,
.pRasterizationState = &rasterization,
.pMultisampleState = &multisample,
.pDepthStencilState = &depthStencil,
.pColorBlendState = &colorBlend,
.pDynamicState = &dynamicState,
.layout = info->shader->pipelineLayout,
.renderPass = info->pass->handle
};
VK(vkCreateGraphicsPipelines(state.device, state.pipelineCache, 1, &pipelineInfo, NULL, &pipeline->handle), "Could not create pipeline") {
return false;
}
nickname(pipeline->handle, VK_OBJECT_TYPE_PIPELINE, info->label);
return true;
}
bool gpu_pipeline_init_compute(gpu_pipeline* pipeline, gpu_compute_pipeline_info* info) {
uint32_t constants[32];
VkSpecializationMapEntry entries[32];
CHECK(info->flagCount <= COUNTOF(constants), "Too many specialization constants") return false;
for (uint32_t i = 0; i < info->flagCount; i++) {
gpu_shader_flag* flag = &info->flags[i];
switch (flag->type) {
case GPU_FLAG_B32: default: constants[i] = flag->value == 0. ? VK_FALSE : VK_TRUE; break;
case GPU_FLAG_I32: constants[i] = (uint32_t) flag->value; break;
case GPU_FLAG_U32: constants[i] = (uint32_t) flag->value; break;
case GPU_FLAG_F32: constants[i] = (float) flag->value; break;
}
entries[i] = (VkSpecializationMapEntry) {
.constantID = flag->id,
.offset = i * sizeof(uint32_t),
.size = sizeof(uint32_t)
};
}
VkSpecializationInfo specialization = {
.mapEntryCount = info->flagCount,
.pMapEntries = entries,
.dataSize = sizeof(constants),
.pData = (const void*) constants
};
VkPipelineShaderStageCreateInfo shader = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = info->shader->handles[0],
.pName = "main",
.pSpecializationInfo = &specialization
};
VkComputePipelineCreateInfo pipelineInfo = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = shader,
.layout = info->shader->pipelineLayout
};
VK(vkCreateComputePipelines(state.device, state.pipelineCache, 1, &pipelineInfo, NULL, &pipeline->handle), "Could not create compute pipeline") {
return false;
}
nickname(pipeline->handle, VK_OBJECT_TYPE_PIPELINE, info->label);
return true;
}
void gpu_pipeline_destroy(gpu_pipeline* pipeline) {
condemn(pipeline->handle, VK_OBJECT_TYPE_PIPELINE);
}
void gpu_pipeline_get_cache(void* data, size_t* size) {
if (vkGetPipelineCacheData(state.device, state.pipelineCache, size, data) != VK_SUCCESS) {
*size = 0;
}
}
// Tally
bool gpu_tally_init(gpu_tally* tally, gpu_tally_info* info) {
VkQueryType queryTypes[] = {
[GPU_TALLY_TIME] = VK_QUERY_TYPE_TIMESTAMP,
[GPU_TALLY_PIXEL] = VK_QUERY_TYPE_OCCLUSION
};
VkQueryPoolCreateInfo createInfo = {
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.queryType = queryTypes[info->type],
.queryCount = info->count
};
VK(vkCreateQueryPool(state.device, &createInfo, NULL, &tally->handle), "Could not create query pool") {
return false;
}
return true;
}
void gpu_tally_destroy(gpu_tally* tally) {
condemn(tally->handle, VK_OBJECT_TYPE_QUERY_POOL);
}
// Stream
gpu_stream* gpu_stream_begin(const char* label) {
gpu_tick* tick = &state.ticks[state.tick[CPU] & TICK_MASK];
CHECK(state.streamCount < COUNTOF(tick->streams), "Too many passes") return NULL;
gpu_stream* stream = &tick->streams[state.streamCount];
nickname(stream->commands, VK_OBJECT_TYPE_COMMAND_BUFFER, label);
VkCommandBufferBeginInfo beginfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT
};
VK(vkBeginCommandBuffer(stream->commands, &beginfo), "Failed to begin stream") return NULL;
state.streamCount++;
return stream;
}
void gpu_stream_end(gpu_stream* stream) {
VK(vkEndCommandBuffer(stream->commands), "Failed to end stream") return;
}
void gpu_render_begin(gpu_stream* stream, gpu_canvas* canvas) {
gpu_pass* pass = canvas->pass;
// Framebuffer
VkImageView images[10];
VkClearValue clears[10];
uint32_t attachmentCount = 0;
for (uint32_t i = 0; i < pass->colorCount; i++) {
images[i] = canvas->color[i].texture->view;
memcpy(clears[i].color.float32, canvas->color[i].clear, 4 * sizeof(float));
attachmentCount++;
}
if (pass->colorCount > 0 && canvas->color[0].resolve) {
for (uint32_t i = 0; i < pass->colorCount; i++) {
images[attachmentCount++] = canvas->color[i].resolve->view;
}
}
if (canvas->depth.texture) {
uint32_t index = attachmentCount++;
images[index] = canvas->depth.texture->view;
clears[index].depthStencil.depth = canvas->depth.clear;
clears[index].depthStencil.stencil = canvas->depth.stencilClear;
if (canvas->depth.resolve) {
images[attachmentCount++] = canvas->depth.resolve->view;
}
}
VkFramebufferCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.renderPass = pass->handle,
.attachmentCount = attachmentCount,
.pAttachments = images,
.width = canvas->width,
.height = canvas->height,
.layers = 1
};
VkFramebuffer framebuffer;
VK(vkCreateFramebuffer(state.device, &info, NULL, &framebuffer), "Failed to create framebuffer") {
return;
}
condemn(framebuffer, VK_OBJECT_TYPE_FRAMEBUFFER); // Framebuffers are cheap and useless
// Layout transitions
uint32_t barrierCount = 0;
VkImageMemoryBarrier2KHR barriers[10];
bool BEGIN = true;
bool RESOLVE = true;
for (uint32_t i = 0; i < pass->colorCount; i++) {
bool DISCARD = ~pass->loadMask & (1 << i);
barrierCount += transitionAttachment(canvas->color[i].texture, BEGIN, !RESOLVE, DISCARD, &barriers[barrierCount]);
barrierCount += transitionAttachment(canvas->color[i].resolve, BEGIN, RESOLVE, true, &barriers[barrierCount]);
}
if (canvas->depth.texture) {
bool DISCARD = !pass->depthLoad;
barrierCount += transitionAttachment(canvas->depth.texture, BEGIN, !RESOLVE, DISCARD, &barriers[barrierCount]);
barrierCount += transitionAttachment(canvas->depth.resolve, BEGIN, RESOLVE, true, &barriers[barrierCount]);
}
if (barrierCount > 0) {
vkCmdPipelineBarrier2KHR(stream->commands, &(VkDependencyInfoKHR) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.imageMemoryBarrierCount = barrierCount,
.pImageMemoryBarriers = barriers
});
}
// Do it!
VkRenderPassBeginInfo beginfo = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = pass->handle,
.framebuffer = framebuffer,
.renderArea = { { 0, 0 }, { canvas->width, canvas->height } },
.clearValueCount = attachmentCount,
.pClearValues = clears
};
vkCmdBeginRenderPass2KHR(stream->commands, &beginfo, &(VkSubpassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO
});
}
void gpu_render_end(gpu_stream* stream, gpu_canvas* canvas) {
vkCmdEndRenderPass2KHR(stream->commands, &(VkSubpassEndInfo) {
.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO
});
gpu_pass* pass = canvas->pass;
// Layout transitions
uint32_t barrierCount = 0;
VkImageMemoryBarrier2KHR barriers[10];
bool BEGIN = true;
bool RESOLVE = true;
bool DISCARD = true;
for (uint32_t i = 0; i < pass->colorCount; i++) {
barrierCount += transitionAttachment(canvas->color[i].texture, !BEGIN, !RESOLVE, !DISCARD, &barriers[barrierCount]);
barrierCount += transitionAttachment(canvas->color[i].resolve, !BEGIN, RESOLVE, !DISCARD, &barriers[barrierCount]);
}
barrierCount += transitionAttachment(canvas->depth.texture, !BEGIN, !RESOLVE, !DISCARD, &barriers[barrierCount]);
barrierCount += transitionAttachment(canvas->depth.resolve, !BEGIN, RESOLVE, !DISCARD, &barriers[barrierCount]);
if (barrierCount > 0) {
vkCmdPipelineBarrier2KHR(stream->commands, &(VkDependencyInfoKHR) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.imageMemoryBarrierCount = barrierCount,
.pImageMemoryBarriers = barriers
});
}
}
void gpu_compute_begin(gpu_stream* stream) {
//
}
void gpu_compute_end(gpu_stream* stream) {
//
}
void gpu_set_viewport(gpu_stream* stream, float view[4], float depthRange[2]) {
VkViewport viewport = { view[0], view[1], view[2], view[3], depthRange[0], depthRange[1] };
vkCmdSetViewport(stream->commands, 0, 1, &viewport);
}
void gpu_set_scissor(gpu_stream* stream, uint32_t scissor[4]) {
VkRect2D rect = { { scissor[0], scissor[1] }, { scissor[2], scissor[3] } };
vkCmdSetScissor(stream->commands, 0, 1, &rect);
}
void gpu_push_constants(gpu_stream* stream, gpu_shader* shader, void* data, uint32_t size) {
VkShaderStageFlags stages = shader->handles[1] ? (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT) : VK_SHADER_STAGE_COMPUTE_BIT;
vkCmdPushConstants(stream->commands, shader->pipelineLayout, stages, 0, size, data);
}
void gpu_bind_pipeline(gpu_stream* stream, gpu_pipeline* pipeline, gpu_pipeline_type type) {
VkPipelineBindPoint pipelineTypes[] = {
[GPU_PIPELINE_GRAPHICS] = VK_PIPELINE_BIND_POINT_GRAPHICS,
[GPU_PIPELINE_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE
};
vkCmdBindPipeline(stream->commands, pipelineTypes[type], pipeline->handle);
}
void gpu_bind_bundles(gpu_stream* stream, gpu_shader* shader, gpu_bundle** bundles, uint32_t first, uint32_t count, uint32_t* dynamicOffsets, uint32_t dynamicOffsetCount) {
VkDescriptorSet sets[COUNTOF(((gpu_shader_info*) NULL)->layouts)];
for (uint32_t i = 0; i < count; i++) {
sets[i] = bundles[i]->handle;
}
VkPipelineBindPoint bindPoint = shader->handles[1] ? VK_PIPELINE_BIND_POINT_GRAPHICS : VK_PIPELINE_BIND_POINT_COMPUTE;
vkCmdBindDescriptorSets(stream->commands, bindPoint, shader->pipelineLayout, first, count, sets, dynamicOffsetCount, dynamicOffsets);
}
void gpu_bind_vertex_buffers(gpu_stream* stream, gpu_buffer** buffers, uint32_t* offsets, uint32_t first, uint32_t count) {
VkBuffer handles[COUNTOF(((gpu_pipeline_info*) NULL)->vertex.bufferStrides)];
uint64_t offsets64[COUNTOF(handles)];
for (uint32_t i = 0; i < count; i++) {
handles[i] = buffers[i]->handle;
offsets64[i] = offsets ? offsets[i] : 0;
}
vkCmdBindVertexBuffers(stream->commands, first, count, handles, offsets64);
}
void gpu_bind_index_buffer(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset, gpu_index_type type) {
vkCmdBindIndexBuffer(stream->commands, buffer->handle, offset, (VkIndexType) type);
}
void gpu_draw(gpu_stream* stream, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t baseInstance) {
vkCmdDraw(stream->commands, vertexCount, instanceCount, firstVertex, baseInstance);
}
void gpu_draw_indexed(gpu_stream* stream, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, uint32_t baseVertex, uint32_t baseInstance) {
vkCmdDrawIndexed(stream->commands, indexCount, instanceCount, firstIndex, baseVertex, baseInstance);
}
void gpu_draw_indirect(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset, uint32_t drawCount, uint32_t stride) {
vkCmdDrawIndirect(stream->commands, buffer->handle, offset, drawCount, stride ? stride : 16);
}
void gpu_draw_indirect_indexed(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset, uint32_t drawCount, uint32_t stride) {
vkCmdDrawIndexedIndirect(stream->commands, buffer->handle, offset, drawCount, stride ? stride : 20);
}
void gpu_compute(gpu_stream* stream, uint32_t x, uint32_t y, uint32_t z) {
vkCmdDispatch(stream->commands, x, y, z);
}
void gpu_compute_indirect(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset) {
vkCmdDispatchIndirect(stream->commands, buffer->handle, offset);
}
void gpu_copy_buffers(gpu_stream* stream, gpu_buffer* src, gpu_buffer* dst, uint32_t srcOffset, uint32_t dstOffset, uint32_t size) {
vkCmdCopyBuffer(stream->commands, src->handle, dst->handle, 1, &(VkBufferCopy) {
.srcOffset = srcOffset,
.dstOffset = dstOffset,
.size = size
});
}
void gpu_copy_textures(gpu_stream* stream, gpu_texture* src, gpu_texture* dst, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t size[3]) {
vkCmdCopyImage(stream->commands, src->handle, VK_IMAGE_LAYOUT_GENERAL, dst->handle, VK_IMAGE_LAYOUT_GENERAL, 1, &(VkImageCopy) {
.srcSubresource = {
.aspectMask = src->aspect,
.mipLevel = srcOffset[3],
.baseArrayLayer = src->layers ? srcOffset[2] : 0,
.layerCount = src->layers ? size[2] : 1
},
.dstSubresource = {
.aspectMask = dst->aspect,
.mipLevel = dstOffset[3],
.baseArrayLayer = dst->layers ? dstOffset[2] : 0,
.layerCount = dst->layers ? size[2] : 1
},
.srcOffset = { srcOffset[0], srcOffset[1], src->layers ? 0 : srcOffset[2] },
.dstOffset = { dstOffset[0], dstOffset[1], dst->layers ? 0 : dstOffset[2] },
.extent = { size[0], size[1], size[2] }
});
}
void gpu_copy_buffer_texture(gpu_stream* stream, gpu_buffer* src, gpu_texture* dst, uint32_t srcOffset, uint32_t dstOffset[4], uint32_t extent[3]) {
VkBufferImageCopy region = {
.bufferOffset = srcOffset,
.imageSubresource.aspectMask = dst->aspect,
.imageSubresource.mipLevel = dstOffset[3],
.imageSubresource.baseArrayLayer = dst->layers ? dstOffset[2] : 0,
.imageSubresource.layerCount = dst->layers ? extent[2] : 1,
.imageOffset = { dstOffset[0], dstOffset[1], dst->layers ? 0 : dstOffset[2] },
.imageExtent = { extent[0], extent[1], dst->layers ? 1 : extent[2] }
};
vkCmdCopyBufferToImage(stream->commands, src->handle, dst->handle, VK_IMAGE_LAYOUT_GENERAL, 1, &region);
}
void gpu_copy_texture_buffer(gpu_stream* stream, gpu_texture* src, gpu_buffer* dst, uint32_t srcOffset[4], uint32_t dstOffset, uint32_t extent[3]) {
VkBufferImageCopy region = {
.bufferOffset = dstOffset,
.imageSubresource.aspectMask = src->aspect,
.imageSubresource.mipLevel = srcOffset[3],
.imageSubresource.baseArrayLayer = src->layers ? srcOffset[2] : 0,
.imageSubresource.layerCount = src->layers ? extent[2] : 1,
.imageOffset = { srcOffset[0], srcOffset[1], src->layers ? 0 : srcOffset[2] },
.imageExtent = { extent[0], extent[1], src->layers ? 1 : extent[2] }
};
vkCmdCopyImageToBuffer(stream->commands, src->handle, VK_IMAGE_LAYOUT_GENERAL, dst->handle, 1, &region);
}
void gpu_copy_tally_buffer(gpu_stream* stream, gpu_tally* src, gpu_buffer* dst, uint32_t srcIndex, uint32_t dstOffset, uint32_t count) {
vkCmdCopyQueryPoolResults(stream->commands, src->handle, srcIndex, count, dst->handle, dstOffset, 4, VK_QUERY_RESULT_WAIT_BIT);
}
void gpu_clear_buffer(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset, uint32_t extent, uint32_t value) {
vkCmdFillBuffer(stream->commands, buffer->handle, offset, extent, value);
}
void gpu_clear_texture(gpu_stream* stream, gpu_texture* texture, float value[4], uint32_t layer, uint32_t layerCount, uint32_t level, uint32_t levelCount) {
VkImageSubresourceRange range = {
.aspectMask = texture->aspect,
.baseMipLevel = level,
.levelCount = levelCount,
.baseArrayLayer = layer,
.layerCount = layerCount
};
if (texture->aspect == VK_IMAGE_ASPECT_COLOR_BIT) {
VkClearColorValue clear;
memcpy(&clear.float32, value, sizeof(clear.float32));
vkCmdClearColorImage(stream->commands, texture->handle, VK_IMAGE_LAYOUT_GENERAL, &clear, 1, &range);
} else {
VkClearDepthStencilValue clear;
clear.depth = value[0];
clear.stencil = (uint8_t) value[1];
vkCmdClearDepthStencilImage(stream->commands, texture->handle, VK_IMAGE_LAYOUT_GENERAL, &clear, 1, &range);
}
}
void gpu_clear_tally(gpu_stream* stream, gpu_tally* tally, uint32_t index, uint32_t count) {
vkCmdResetQueryPool(stream->commands, tally->handle, index, count);
}
void gpu_blit(gpu_stream* stream, gpu_texture* src, gpu_texture* dst, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t srcExtent[3], uint32_t dstExtent[3], gpu_filter filter) {
VkImageBlit region = {
.srcSubresource = {
.aspectMask = src->aspect,
.mipLevel = srcOffset[3],
.baseArrayLayer = src->layers ? srcOffset[2] : 0,
.layerCount = src->layers ? srcExtent[2] : 1
},
.dstSubresource = {
.aspectMask = dst->aspect,
.mipLevel = dstOffset[3],
.baseArrayLayer = dst->layers ? dstOffset[2] : 0,
.layerCount = dst->layers ? dstExtent[2] : 1
},
.srcOffsets[0] = { srcOffset[0], srcOffset[1], src->layers ? 0 : srcOffset[2] },
.dstOffsets[0] = { dstOffset[0], dstOffset[1], dst->layers ? 0 : dstOffset[2] },
.srcOffsets[1] = { srcOffset[0] + srcExtent[0], srcOffset[1] + srcExtent[1], src->layers ? 1 : srcOffset[2] + srcExtent[2] },
.dstOffsets[1] = { dstOffset[0] + dstExtent[0], dstOffset[1] + dstExtent[1], dst->layers ? 1 : dstOffset[2] + dstExtent[2] }
};
static const VkFilter filters[] = {
[GPU_FILTER_NEAREST] = VK_FILTER_NEAREST,
[GPU_FILTER_LINEAR] = VK_FILTER_LINEAR
};
vkCmdBlitImage(stream->commands, src->handle, VK_IMAGE_LAYOUT_GENERAL, dst->handle, VK_IMAGE_LAYOUT_GENERAL, 1, &region, filters[filter]);
}
void gpu_sync(gpu_stream* stream, gpu_barrier* barriers, uint32_t count) {
VkMemoryBarrier2KHR memoryBarrier = { .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR };
for (uint32_t i = 0; i < count; i++) {
gpu_barrier* barrier = &barriers[i];
memoryBarrier.srcStageMask |= convertPhase(barrier->prev, false);
memoryBarrier.dstStageMask |= convertPhase(barrier->next, true);
memoryBarrier.srcAccessMask |= convertCache(barrier->flush);
memoryBarrier.dstAccessMask |= convertCache(barrier->clear);
}
if (memoryBarrier.srcStageMask && memoryBarrier.dstStageMask) {
vkCmdPipelineBarrier2KHR(stream->commands, &(VkDependencyInfoKHR) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.pMemoryBarriers = &memoryBarrier,
.memoryBarrierCount = 1
});
}
}
void gpu_tally_begin(gpu_stream* stream, gpu_tally* tally, uint32_t index) {
vkCmdBeginQuery(stream->commands, tally->handle, index, 0);
}
void gpu_tally_finish(gpu_stream* stream, gpu_tally* tally, uint32_t index) {
vkCmdEndQuery(stream->commands, tally->handle, index);
}
void gpu_tally_mark(gpu_stream* stream, gpu_tally* tally, uint32_t index) {
vkCmdWriteTimestamp(stream->commands, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, tally->handle, index);
}
// Acquires an OpenXR swapchain texture, transitioning it to the natural layout
void gpu_xr_acquire(gpu_stream* stream, gpu_texture* texture) {
vkCmdPipelineBarrier2KHR(stream->commands, &(VkDependencyInfoKHR) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.imageMemoryBarrierCount = 1,
.pImageMemoryBarriers = &(VkImageMemoryBarrier2KHR) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR,
.srcStageMask = VK_PIPELINE_STAGE_2_NONE_KHR,
.dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR,
.srcAccessMask = VK_ACCESS_2_NONE_KHR,
.dstAccessMask = VK_ACCESS_2_MEMORY_READ_BIT_KHR | VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
.oldLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR,
.newLayout = texture->layout,
.image = texture->handle,
.subresourceRange.aspectMask = texture->aspect,
.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS,
.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS
}
});
}
// Releases an OpenXR swapchain texture, transitioning it back to the layout expected by OpenXR
void gpu_xr_release(gpu_stream* stream, gpu_texture* texture) {
vkCmdPipelineBarrier2KHR(stream->commands, &(VkDependencyInfoKHR) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR,
.imageMemoryBarrierCount = 1,
.pImageMemoryBarriers = &(VkImageMemoryBarrier2KHR) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR,
.srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR,
.dstStageMask = VK_PIPELINE_STAGE_2_NONE_KHR,
.srcAccessMask = VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
.dstAccessMask = VK_ACCESS_2_NONE_KHR,
.oldLayout = texture->layout,
.newLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR,
.image = texture->handle,
.subresourceRange.aspectMask = texture->aspect,
.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS,
.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS
}
});
}
// Entry
bool gpu_init(gpu_config* config) {
state.config = *config;
// Load
#ifdef _WIN32
state.library = LoadLibraryA("vulkan-1.dll");
CHECK(state.library, "Failed to load vulkan library") return gpu_destroy(), false;
vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr) GetProcAddress(state.library, "vkGetInstanceProcAddr");
#elif __APPLE__
state.library = dlopen("libvulkan.1.dylib", RTLD_NOW | RTLD_LOCAL);
if (!state.library) state.library = dlopen("libMoltenVK.dylib", RTLD_NOW | RTLD_LOCAL);
CHECK(state.library, "Failed to load vulkan library") return gpu_destroy(), false;
vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr) dlsym(state.library, "vkGetInstanceProcAddr");
#else
state.library = dlopen("libvulkan.so.1", RTLD_NOW | RTLD_LOCAL);
if (!state.library) state.library = dlopen("libvulkan.so", RTLD_NOW | RTLD_LOCAL);
CHECK(state.library, "Failed to load vulkan library") return gpu_destroy(), false;
vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr) dlsym(state.library, "vkGetInstanceProcAddr");
#endif
GPU_FOREACH_ANONYMOUS(GPU_LOAD_ANONYMOUS);
{
// Layers
uint32_t layerCount = 0;
VK(vkEnumerateInstanceLayerProperties(&layerCount, NULL), "Failed to enumerate instance layers") return gpu_destroy(), false;
VkLayerProperties* layerInfo = config->fnAlloc(layerCount * sizeof(*layerInfo));
CHECK(layerInfo, "Out of memory") return gpu_destroy(), false;
VK(vkEnumerateInstanceLayerProperties(&layerCount, layerInfo), "Failed to enumerate instance layers") return gpu_destroy(), false;
struct { const char* name; bool shouldEnable; bool* flag; } layers[] = {
{ "VK_LAYER_KHRONOS_validation", config->debug, &state.extensions.validation }
};
uint32_t enabledLayerCount = 0;
const char* enabledLayers[COUNTOF(layers)];
for (uint32_t i = 0; i < COUNTOF(layers); i++) {
if (!layers[i].shouldEnable) continue;
if (hasLayer(layerInfo, layerCount, layers[i].name)) {
CHECK(enabledLayerCount < COUNTOF(enabledLayers), "Too many layers") return gpu_destroy(), false;
if (layers[i].flag) *layers[i].flag = true;
enabledLayers[enabledLayerCount++] = layers[i].name;
} else if (!layers[i].flag) {
vcheck(VK_ERROR_LAYER_NOT_PRESENT, layers[i].name);
return gpu_destroy(), false;
}
}
config->fnFree(layerInfo);
// Extensions
uint32_t extensionCount = 0;
VK(vkEnumerateInstanceExtensionProperties(NULL, &extensionCount, NULL), "Failed to enumerate instance extensions") return gpu_destroy(), false;
VkExtensionProperties* extensionInfo = config->fnAlloc(extensionCount * sizeof(*extensionInfo));
CHECK(extensionInfo, "Out of memory") return gpu_destroy(), false;
VK(vkEnumerateInstanceExtensionProperties(NULL, &extensionCount, extensionInfo), "Failed to enumerate instance extensions") return gpu_destroy(), false;
struct { const char* name; bool shouldEnable; bool* flag; } extensions[] = {
{ "VK_KHR_portability_enumeration", true, &state.extensions.portability },
{ "VK_EXT_debug_utils", config->debug, &state.extensions.debug },
{ "VK_EXT_swapchain_colorspace", true, &state.extensions.colorspace },
{ "VK_KHR_surface", true, &state.extensions.surface },
#if defined(_WIN32)
{ "VK_KHR_win32_surface", true, &state.extensions.surfaceOS },
#elif defined(__APPLE__)
{ "VK_EXT_metal_surface", true, &state.extensions.surfaceOS },
#elif defined(__linux__) && !defined(__ANDROID__)
{ "VK_KHR_xcb_surface", true, &state.extensions.surfaceOS },
#endif
};
uint32_t enabledExtensionCount = 0;
const char* enabledExtensions[COUNTOF(extensions)];
for (uint32_t i = 0; i < COUNTOF(extensions); i++) {
if (!extensions[i].shouldEnable) continue;
if (hasExtension(extensionInfo, extensionCount, extensions[i].name)) {
CHECK(enabledExtensionCount < COUNTOF(enabledExtensions), "Too many instance extensions") return gpu_destroy(), false;
if (extensions[i].flag) *extensions[i].flag = true;
enabledExtensions[enabledExtensionCount++] = extensions[i].name;
} else if (!extensions[i].flag) {
vcheck(VK_ERROR_EXTENSION_NOT_PRESENT, extensions[i].name);
return gpu_destroy(), false;
}
}
config->fnFree(extensionInfo);
// Instance
VkInstanceCreateInfo instanceInfo = {
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
.flags = state.extensions.portability ? VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR : 0,
.pApplicationInfo = &(VkApplicationInfo) {
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
.pEngineName = config->engineName,
.engineVersion = VK_MAKE_VERSION(config->engineVersion[0], config->engineVersion[1], config->engineVersion[2]),
.apiVersion = VK_MAKE_VERSION(1, 1, 0)
},
.enabledLayerCount = enabledLayerCount,
.ppEnabledLayerNames = enabledLayers,
.enabledExtensionCount = enabledExtensionCount,
.ppEnabledExtensionNames = enabledExtensions
};
if (config->vk.createInstance) {
VK(config->vk.createInstance(&instanceInfo, NULL, (uintptr_t) &state.instance, (void*) vkGetInstanceProcAddr), "Instance creation failed") return gpu_destroy(), false;
} else {
VK(vkCreateInstance(&instanceInfo, NULL, &state.instance), "Instance creation failed") return gpu_destroy(), false;
}
GPU_FOREACH_INSTANCE(GPU_LOAD_INSTANCE);
if (config->debug && config->fnLog) {
if (state.extensions.debug) {
VkDebugUtilsMessengerCreateInfoEXT messengerInfo = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,
.pfnUserCallback = relay
};
VK(vkCreateDebugUtilsMessengerEXT(state.instance, &messengerInfo, NULL, &state.messenger), "Debug hook setup failed") return gpu_destroy(), false;
if (!state.extensions.validation) {
LOG("Warning: GPU debugging is enabled, but validation layer is not installed");
}
} else {
LOG("Warning: GPU debugging is enabled, but debug extension is not supported");
}
}
}
{ // Device
if (config->vk.getPhysicalDevice) {
config->vk.getPhysicalDevice(state.instance, (uintptr_t) &state.adapter);
} else {
uint32_t deviceCount = 1;
VK(vkEnumeratePhysicalDevices(state.instance, &deviceCount, &state.adapter), "Physical device enumeration failed") return gpu_destroy(), false;
}
VkPhysicalDeviceMultiviewProperties multiviewProperties = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES };
VkPhysicalDeviceSubgroupProperties subgroupProperties = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES, .pNext = &multiviewProperties };
VkPhysicalDeviceProperties2 properties2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, .pNext = &subgroupProperties };
vkGetPhysicalDeviceProperties2(state.adapter, &properties2);
if (config->device) {
VkPhysicalDeviceProperties* properties = &properties2.properties;
config->device->deviceId = properties->deviceID;
config->device->vendorId = properties->vendorID;
memcpy(config->device->deviceName, properties->deviceName, MIN(sizeof(config->device->deviceName), sizeof(properties->deviceName)));
config->device->renderer = "Vulkan";
config->device->subgroupSize = subgroupProperties.subgroupSize;
config->device->discrete = properties->deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
}
if (config->limits) {
VkPhysicalDeviceLimits* limits = &properties2.properties.limits;
config->limits->textureSize2D = limits->maxImageDimension2D;
config->limits->textureSize3D = limits->maxImageDimension3D;
config->limits->textureSizeCube = limits->maxImageDimensionCube;
config->limits->textureLayers = limits->maxImageArrayLayers;
config->limits->renderSize[0] = limits->maxFramebufferWidth;
config->limits->renderSize[1] = limits->maxFramebufferHeight;
config->limits->renderSize[2] = MAX(multiviewProperties.maxMultiviewViewCount, 1);
config->limits->uniformBuffersPerStage = limits->maxPerStageDescriptorUniformBuffers;
config->limits->storageBuffersPerStage = limits->maxPerStageDescriptorStorageBuffers;
config->limits->sampledTexturesPerStage = limits->maxPerStageDescriptorSampledImages;
config->limits->storageTexturesPerStage = limits->maxPerStageDescriptorStorageImages;
config->limits->samplersPerStage = limits->maxPerStageDescriptorSamplers;
config->limits->uniformBufferRange = limits->maxUniformBufferRange;
config->limits->storageBufferRange = limits->maxStorageBufferRange;
config->limits->uniformBufferAlign = limits->minUniformBufferOffsetAlignment;
config->limits->storageBufferAlign = limits->minStorageBufferOffsetAlignment;
config->limits->vertexAttributes = MIN(limits->maxVertexInputAttributes, COUNTOF(((gpu_pipeline_info*) NULL)->vertex.attributes));
config->limits->vertexBuffers = MIN(limits->maxVertexInputBindings, COUNTOF(((gpu_pipeline_info*) NULL)->vertex.bufferStrides));
config->limits->vertexBufferStride = MIN(limits->maxVertexInputBindingStride, UINT16_MAX);
config->limits->vertexShaderOutputs = limits->maxVertexOutputComponents;
config->limits->clipDistances = limits->maxClipDistances;
config->limits->cullDistances = limits->maxCullDistances;
config->limits->clipAndCullDistances = limits->maxCombinedClipAndCullDistances;
config->limits->workgroupCount[0] = limits->maxComputeWorkGroupCount[0];
config->limits->workgroupCount[1] = limits->maxComputeWorkGroupCount[1];
config->limits->workgroupCount[2] = limits->maxComputeWorkGroupCount[2];
config->limits->workgroupSize[0] = limits->maxComputeWorkGroupSize[0];
config->limits->workgroupSize[1] = limits->maxComputeWorkGroupSize[1];
config->limits->workgroupSize[2] = limits->maxComputeWorkGroupSize[2];
config->limits->totalWorkgroupSize = limits->maxComputeWorkGroupInvocations;
config->limits->computeSharedMemory = limits->maxComputeSharedMemorySize;
config->limits->pushConstantSize = limits->maxPushConstantsSize;
config->limits->indirectDrawCount = limits->maxDrawIndirectCount;
config->limits->instances = multiviewProperties.maxMultiviewInstanceIndex;
config->limits->timestampPeriod = limits->timestampPeriod;
config->limits->anisotropy = limits->maxSamplerAnisotropy;
config->limits->pointSize = limits->pointSizeRange[1];
}
VkPhysicalDeviceSynchronization2FeaturesKHR synchronization2Features = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR,
.synchronization2 = true
};
VkPhysicalDeviceShaderDrawParameterFeatures shaderDrawParameterFeatures = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES,
.pNext = &synchronization2Features
};
VkPhysicalDeviceMultiviewFeatures multiviewFeatures = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES,
.pNext = &shaderDrawParameterFeatures
};
VkPhysicalDeviceFeatures2 enabledFeatures = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
.pNext = &multiviewFeatures
};
if (config->features) {
VkPhysicalDeviceFeatures2 features2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 };
VkPhysicalDeviceFeatures* enable = &enabledFeatures.features;
VkPhysicalDeviceFeatures* supports = &features2.features;
vkGetPhysicalDeviceFeatures2(state.adapter, &features2);
// Required features
enable->fullDrawIndexUint32 = true;
enable->imageCubeArray = true;
enable->independentBlend = true;
synchronization2Features.synchronization2 = true;
multiviewFeatures.multiview = true;
shaderDrawParameterFeatures.shaderDrawParameters = true;
// Internal features (exposed as limits)
enable->samplerAnisotropy = supports->samplerAnisotropy;
enable->multiDrawIndirect = supports->multiDrawIndirect;
enable->shaderClipDistance = supports->shaderClipDistance;
enable->shaderCullDistance = supports->shaderCullDistance;
enable->largePoints = supports->largePoints;
// Optional features (currently always enabled when supported)
config->features->textureBC = (enable->textureCompressionBC = supports->textureCompressionBC);
config->features->textureASTC = (enable->textureCompressionASTC_LDR = supports->textureCompressionASTC_LDR);
config->features->wireframe = (enable->fillModeNonSolid = supports->fillModeNonSolid);
config->features->depthClamp = (enable->depthClamp = supports->depthClamp);
config->features->indirectDrawFirstInstance = (enable->drawIndirectFirstInstance = supports->drawIndirectFirstInstance);
config->features->float64 = (enable->shaderFloat64 = supports->shaderFloat64);
config->features->int64 = (enable->shaderInt64 = supports->shaderInt64);
config->features->int16 = (enable->shaderInt16 = supports->shaderInt16);
// Formats
for (uint32_t i = 0; i < GPU_FORMAT_COUNT; i++) {
for (int j = 0; j < 2; j++) {
VkFormat format = convertFormat(i, j);
if (j == 1 && convertFormat(i, 0) == format) {
config->features->formats[i][j] = config->features->formats[i][0];
} else {
VkFormatProperties formatProperties;
vkGetPhysicalDeviceFormatProperties(state.adapter, format, &formatProperties);
uint32_t sampleMask = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
uint32_t renderMask = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
uint32_t blitMask = VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
uint32_t flags = formatProperties.optimalTilingFeatures;
config->features->formats[i][j] =
((flags & sampleMask) ? GPU_FEATURE_SAMPLE : 0) |
((flags & renderMask) == renderMask ? GPU_FEATURE_RENDER : 0) |
((flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) ? GPU_FEATURE_RENDER : 0) |
((flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) ? GPU_FEATURE_STORAGE : 0) |
((flags & blitMask) == blitMask ? GPU_FEATURE_BLIT : 0);
}
}
}
}
state.queueFamilyIndex = ~0u;
uint32_t queueFamilyCount = 0;
vkGetPhysicalDeviceQueueFamilyProperties(state.adapter, &queueFamilyCount, NULL);
VkQueueFamilyProperties* queueFamilies = config->fnAlloc(queueFamilyCount * sizeof(*queueFamilies));
CHECK(queueFamilies, "Out of memory") return gpu_destroy(), false;
vkGetPhysicalDeviceQueueFamilyProperties(state.adapter, &queueFamilyCount, queueFamilies);
uint32_t mask = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
for (uint32_t i = 0; i < queueFamilyCount; i++) {
if ((queueFamilies[i].queueFlags & mask) == mask) {
state.queueFamilyIndex = i;
break;
}
}
config->fnFree(queueFamilies);
CHECK(state.queueFamilyIndex != ~0u, "Queue selection failed") return gpu_destroy(), false;
struct { const char* name; bool shouldEnable; bool* flag; } extensions[] = {
{ "VK_KHR_create_renderpass2", true, NULL },
{ "VK_KHR_swapchain", true, &state.extensions.swapchain },
{ "VK_KHR_portability_subset", true, &state.extensions.portability },
{ "VK_KHR_depth_stencil_resolve", true, &state.extensions.depthResolve },
{ "VK_KHR_shader_non_semantic_info", config->debug, &state.extensions.shaderDebug },
{ "VK_KHR_image_format_list", true, &state.extensions.formatList },
{ "VK_KHR_synchronization2", true, NULL }
};
uint32_t extensionCount = 0;
VK(vkEnumerateDeviceExtensionProperties(state.adapter, NULL, &extensionCount, NULL), "Failed to enumerate device extensions") return gpu_destroy(), false;
VkExtensionProperties* extensionInfo = config->fnAlloc(extensionCount * sizeof(*extensionInfo));
CHECK(extensionInfo, "Out of memory") return gpu_destroy(), false;
VK(vkEnumerateDeviceExtensionProperties(state.adapter, NULL, &extensionCount, extensionInfo), "Failed to enumerate device extensions") return gpu_destroy(), false;
uint32_t enabledExtensionCount = 0;
const char* enabledExtensions[COUNTOF(extensions)];
for (uint32_t i = 0; i < COUNTOF(extensions); i++) {
if (!extensions[i].shouldEnable) continue;
if (hasExtension(extensionInfo, extensionCount, extensions[i].name)) {
CHECK(enabledExtensionCount < COUNTOF(enabledExtensions), "Too many device extensions") return gpu_destroy(), false;
if (extensions[i].flag) *extensions[i].flag = true;
enabledExtensions[enabledExtensionCount++] = extensions[i].name;
} else if (!extensions[i].flag) {
vcheck(VK_ERROR_EXTENSION_NOT_PRESENT, extensions[i].name);
return gpu_destroy(), false;
}
}
config->fnFree(extensionInfo);
if (config->features) {
config->features->depthResolve = state.extensions.depthResolve;
config->features->shaderDebug = state.extensions.shaderDebug;
}
VkDeviceCreateInfo deviceInfo = {
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
.pNext = config->features ? &enabledFeatures : NULL,
.queueCreateInfoCount = 1,
.pQueueCreateInfos = &(VkDeviceQueueCreateInfo) {
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
.queueFamilyIndex = state.queueFamilyIndex,
.pQueuePriorities = &(float) { 1.f },
.queueCount = 1
},
.enabledExtensionCount = enabledExtensionCount,
.ppEnabledExtensionNames = enabledExtensions
};
if (config->vk.createDevice) {
VK(config->vk.createDevice(state.instance, &deviceInfo, NULL, (uintptr_t) &state.device, (void*) vkGetInstanceProcAddr), "Device creation failed") return gpu_destroy(), false;
} else {
VK(vkCreateDevice(state.adapter, &deviceInfo, NULL, &state.device), "Device creation failed") return gpu_destroy(), false;
}
vkGetDeviceQueue(state.device, state.queueFamilyIndex, 0, &state.queue);
GPU_FOREACH_DEVICE(GPU_LOAD_DEVICE);
}
{ // Allocators (without VK_KHR_maintenance4, need to create objects to get memory requirements)
VkPhysicalDeviceMemoryProperties memoryProperties;
vkGetPhysicalDeviceMemoryProperties(state.adapter, &memoryProperties);
VkMemoryType* memoryTypes = memoryProperties.memoryTypes;
VkMemoryPropertyFlags hostVisible = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
// Buffers
// There are 4 types of buffer memory, which use different strategies/memory types:
// - STATIC: Regular device-local memory. Not necessarily mappable, fast to read on GPU.
// - STREAM: Used to "stream" data to the GPU, to be read by shaders. This tries to use the
// special 256MB memory type present on discrete GPUs because it's both device local and host-
// visible and that supposedly makes it fast. A single buffer is allocated with a "zone" for
// each tick. If one of the zones fills up, a new bigger buffer is allocated. It's important
// to have one buffer and keep it alive since streaming is expected to happen very frequently.
// - UPLOAD: Used to stage data to upload to buffers/textures. Can only be used for transfers.
// Uses uncached host-visible memory to not pollute the CPU cache or waste the STREAM memory.
// - DOWNLOAD: Used for readbacks. Uses cached memory when available since reading from
// uncached memory on the CPU is super duper slow.
VkMemoryPropertyFlags bufferFlags[] = {
[GPU_MEMORY_BUFFER_STATIC] = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
[GPU_MEMORY_BUFFER_STREAM] = hostVisible | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
[GPU_MEMORY_BUFFER_UPLOAD] = hostVisible,
[GPU_MEMORY_BUFFER_DOWNLOAD] = hostVisible | VK_MEMORY_PROPERTY_HOST_CACHED_BIT
};
for (uint32_t i = 0; i < COUNTOF(bufferFlags); i++) {
gpu_allocator* allocator = &state.allocators[i];
state.allocatorLookup[i] = i;
VkBufferCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.usage = getBufferUsage(i),
.size = 4
};
VkBuffer buffer;
VkMemoryRequirements requirements;
vkCreateBuffer(state.device, &info, NULL, &buffer);
vkGetBufferMemoryRequirements(state.device, buffer, &requirements);
vkDestroyBuffer(state.device, buffer, NULL);
VkMemoryPropertyFlags fallback = i == GPU_MEMORY_BUFFER_STATIC ? VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : hostVisible;
for (uint32_t j = 0; j < memoryProperties.memoryTypeCount; j++) {
if (~requirements.memoryTypeBits & (1 << j)) {
continue;
}
if ((memoryTypes[j].propertyFlags & bufferFlags[i]) == bufferFlags[i]) {
allocator->memoryFlags = memoryTypes[j].propertyFlags;
allocator->memoryType = j;
break;
}
if ((memoryTypes[j].propertyFlags & fallback) == fallback) {
allocator->memoryFlags = memoryTypes[j].propertyFlags;
allocator->memoryType = j;
}
}
}
// Textures
VkImageUsageFlags transient = VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT;
struct { VkFormat format; VkImageUsageFlags usage; } imageFlags[] = {
[GPU_MEMORY_TEXTURE_COLOR] = { VK_FORMAT_R8_UNORM, VK_IMAGE_USAGE_SAMPLED_BIT },
[GPU_MEMORY_TEXTURE_D16] = { VK_FORMAT_D16_UNORM, VK_IMAGE_USAGE_SAMPLED_BIT },
[GPU_MEMORY_TEXTURE_D32F] = { VK_FORMAT_D32_SFLOAT, VK_IMAGE_USAGE_SAMPLED_BIT },
[GPU_MEMORY_TEXTURE_D24S8] = { VK_FORMAT_D24_UNORM_S8_UINT, VK_IMAGE_USAGE_SAMPLED_BIT },
[GPU_MEMORY_TEXTURE_D32FS8] = { VK_FORMAT_D32_SFLOAT_S8_UINT, VK_IMAGE_USAGE_SAMPLED_BIT },
[GPU_MEMORY_TEXTURE_LAZY_COLOR] = { VK_FORMAT_R8_UNORM, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | transient },
[GPU_MEMORY_TEXTURE_LAZY_D16] = { VK_FORMAT_D16_UNORM, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | transient },
[GPU_MEMORY_TEXTURE_LAZY_D32F] = { VK_FORMAT_D32_SFLOAT, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | transient },
[GPU_MEMORY_TEXTURE_LAZY_D24S8] = { VK_FORMAT_D24_UNORM_S8_UINT, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | transient },
[GPU_MEMORY_TEXTURE_LAZY_D32FS8] = { VK_FORMAT_D32_SFLOAT_S8_UINT, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | transient }
};
uint32_t allocatorCount = GPU_MEMORY_TEXTURE_COLOR;
for (uint32_t i = GPU_MEMORY_TEXTURE_COLOR; i < COUNTOF(imageFlags); i++) {
VkFormatProperties formatProperties;
vkGetPhysicalDeviceFormatProperties(state.adapter, imageFlags[i].format, &formatProperties);
if (formatProperties.optimalTilingFeatures == 0) {
state.allocatorLookup[i] = 0xff;
continue;
}
VkImageCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = imageFlags[i].format,
.extent = { 1, 1, 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.usage = imageFlags[i].usage
};
VkImage image;
VkMemoryRequirements requirements;
vkCreateImage(state.device, &info, NULL, &image);
vkGetImageMemoryRequirements(state.device, image, &requirements);
vkDestroyImage(state.device, image, NULL);
uint16_t memoryType, memoryFlags;
for (uint32_t j = 0; j < memoryProperties.memoryTypeCount; j++) {
if ((requirements.memoryTypeBits & (1 << j)) && (memoryTypes[j].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
memoryFlags = memoryTypes[j].propertyFlags;
memoryType = j;
break;
}
}
// Unlike buffers, we try to merge our texture allocators since all the textures have similar
// lifetime characteristics, and using less allocators greatly reduces memory usage due to the
// huge block size for textures. Basically, only append an allocator if needed.
bool merged = false;
for (uint32_t j = GPU_MEMORY_TEXTURE_COLOR; j < allocatorCount; j++) {
if (memoryType == state.allocators[j].memoryType) {
state.allocatorLookup[i] = j;
merged = true;
break;
}
}
if (!merged) {
uint32_t index = allocatorCount++;
state.allocators[index].memoryFlags = memoryFlags;
state.allocators[index].memoryType = memoryType;
state.allocatorLookup[i] = index;
}
}
}
// Ticks
for (uint32_t i = 0; i < COUNTOF(state.ticks); i++) {
VkCommandPoolCreateInfo poolInfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
.queueFamilyIndex = state.queueFamilyIndex
};
VK(vkCreateCommandPool(state.device, &poolInfo, NULL, &state.ticks[i].pool), "Command pool creation failed") return gpu_destroy(), false;
VkCommandBufferAllocateInfo allocateInfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandPool = state.ticks[i].pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = COUNTOF(state.ticks[i].streams)
};
VkCommandBuffer* commandBuffers = &state.ticks[i].streams[0].commands;
VK(vkAllocateCommandBuffers(state.device, &allocateInfo, commandBuffers), "Commmand buffer allocation failed") return gpu_destroy(), false;
VkSemaphoreCreateInfo semaphoreInfo = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO
};
VK(vkCreateSemaphore(state.device, &semaphoreInfo, NULL, &state.ticks[i].semaphores[0]), "Semaphore creation failed") return gpu_destroy(), false;
VK(vkCreateSemaphore(state.device, &semaphoreInfo, NULL, &state.ticks[i].semaphores[1]), "Semaphore creation failed") return gpu_destroy(), false;
VkFenceCreateInfo fenceInfo = {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.flags = VK_FENCE_CREATE_SIGNALED_BIT
};
VK(vkCreateFence(state.device, &fenceInfo, NULL, &state.ticks[i].fence), "Fence creation failed") return gpu_destroy(), false;
}
// Pipeline cache
VkPipelineCacheCreateInfo cacheInfo = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
};
// Not using VkPipelineCacheHeaderVersionOne since it's missing from Android headers
if (config->vk.cacheSize >= 16 + VK_UUID_SIZE) {
uint32_t headerSize, headerVersion;
memcpy(&headerSize, config->vk.cacheData, 4);
memcpy(&headerVersion, (char*) config->vk.cacheData + 4, 4);
if (headerSize == 16 + VK_UUID_SIZE && headerVersion == VK_PIPELINE_CACHE_HEADER_VERSION_ONE) {
cacheInfo.initialDataSize = config->vk.cacheSize;
cacheInfo.pInitialData = config->vk.cacheData;
}
}
VK(vkCreatePipelineCache(state.device, &cacheInfo, NULL, &state.pipelineCache), "Pipeline cache creation failed") return gpu_destroy(), false;
state.tick[CPU] = COUNTOF(state.ticks) - 1;
return true;
}
void gpu_destroy(void) {
if (state.device) vkDeviceWaitIdle(state.device);
state.tick[GPU] = state.tick[CPU];
expunge();
if (state.pipelineCache) vkDestroyPipelineCache(state.device, state.pipelineCache, NULL);
for (uint32_t i = 0; i < COUNTOF(state.ticks); i++) {
gpu_tick* tick = &state.ticks[i];
if (tick->pool) vkDestroyCommandPool(state.device, tick->pool, NULL);
if (tick->semaphores[0]) vkDestroySemaphore(state.device, tick->semaphores[0], NULL);
if (tick->semaphores[1]) vkDestroySemaphore(state.device, tick->semaphores[1], NULL);
if (tick->fence) vkDestroyFence(state.device, tick->fence, NULL);
}
for (uint32_t i = 0; i < COUNTOF(state.memory); i++) {
if (state.memory[i].handle) vkFreeMemory(state.device, state.memory[i].handle, NULL);
}
for (uint32_t i = 0; i < COUNTOF(state.surface.images); i++) {
if (state.surface.images[i].view) vkDestroyImageView(state.device, state.surface.images[i].view, NULL);
}
if (state.surface.swapchain) vkDestroySwapchainKHR(state.device, state.surface.swapchain, NULL);
if (state.device) vkDestroyDevice(state.device, NULL);
if (state.surface.handle) vkDestroySurfaceKHR(state.instance, state.surface.handle, NULL);
if (state.messenger) vkDestroyDebugUtilsMessengerEXT(state.instance, state.messenger, NULL);
if (state.instance) vkDestroyInstance(state.instance, NULL);
#ifdef _WIN32
if (state.library) FreeLibrary(state.library);
#else
if (state.library) dlclose(state.library);
#endif
memset(&state, 0, sizeof(state));
}
uint32_t gpu_begin(void) {
gpu_wait_tick(++state.tick[CPU] - COUNTOF(state.ticks));
gpu_tick* tick = &state.ticks[state.tick[CPU] & TICK_MASK];
VK(vkResetFences(state.device, 1, &tick->fence), "Fence reset failed") return 0;
VK(vkResetCommandPool(state.device, tick->pool, 0), "Command pool reset failed") return 0;
state.streamCount = 0;
expunge();
return state.tick[CPU];
}
void gpu_submit(gpu_stream** streams, uint32_t count) {
gpu_tick* tick = &state.ticks[state.tick[CPU] & TICK_MASK];
VkCommandBuffer commands[COUNTOF(tick->streams)];
for (uint32_t i = 0; i < count; i++) {
commands[i] = streams[i]->commands;
}
VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR;
VkSubmitInfo submit = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.waitSemaphoreCount = !!state.surface.semaphore,
.pWaitSemaphores = &state.surface.semaphore,
.pWaitDstStageMask = &waitStage,
.commandBufferCount = count,
.pCommandBuffers = commands
};
VK(vkQueueSubmit(state.queue, 1, &submit, tick->fence), "Queue submit failed") {}
state.surface.semaphore = VK_NULL_HANDLE;
}
bool gpu_is_complete(uint32_t tick) {
return state.tick[GPU] >= tick;
}
bool gpu_wait_tick(uint32_t tick) {
if (state.tick[GPU] < tick) {
VkFence fence = state.ticks[tick & TICK_MASK].fence;
VK(vkWaitForFences(state.device, 1, &fence, VK_FALSE, ~0ull), "Fence wait failed") return false;
state.tick[GPU] = tick;
return true;
} else {
return false;
}
}
void gpu_wait_idle(void) {
vkDeviceWaitIdle(state.device);
state.tick[GPU] = state.tick[CPU];
}
uintptr_t gpu_vk_get_instance(void) {
return (uintptr_t) state.instance;
}
uintptr_t gpu_vk_get_physical_device(void) {
return (uintptr_t) state.adapter;
}
uintptr_t gpu_vk_get_device(void) {
return (uintptr_t) state.device;
}
uintptr_t gpu_vk_get_queue(uint32_t* queueFamilyIndex, uint32_t* queueIndex) {
return *queueFamilyIndex = state.queueFamilyIndex, *queueIndex = 0, (uintptr_t) state.queue;
}
// Helpers
static gpu_memory* gpu_allocate(gpu_memory_type type, VkMemoryRequirements info, VkDeviceSize* offset) {
gpu_allocator* allocator = &state.allocators[state.allocatorLookup[type]];
static const uint32_t blockSizes[] = {
[GPU_MEMORY_BUFFER_STATIC] = 1 << 26,
[GPU_MEMORY_BUFFER_STREAM] = 0,
[GPU_MEMORY_BUFFER_UPLOAD] = 0,
[GPU_MEMORY_BUFFER_DOWNLOAD] = 0,
[GPU_MEMORY_TEXTURE_COLOR] = 1 << 28,
[GPU_MEMORY_TEXTURE_D16] = 1 << 28,
[GPU_MEMORY_TEXTURE_D32F] = 1 << 28,
[GPU_MEMORY_TEXTURE_D24S8] = 1 << 28,
[GPU_MEMORY_TEXTURE_D32FS8] = 1 << 28,
[GPU_MEMORY_TEXTURE_LAZY_COLOR] = 1 << 28,
[GPU_MEMORY_TEXTURE_LAZY_D16] = 1 << 28,
[GPU_MEMORY_TEXTURE_LAZY_D32F] = 1 << 28,
[GPU_MEMORY_TEXTURE_LAZY_D24S8] = 1 << 28,
[GPU_MEMORY_TEXTURE_LAZY_D32FS8] = 1 << 28
};
uint32_t blockSize = blockSizes[type];
uint32_t cursor = ALIGN(allocator->cursor, info.alignment);
if (allocator->block && cursor + info.size <= blockSize) {
allocator->cursor = cursor + info.size;
allocator->block->refs++;
*offset = cursor;
return allocator->block;
}
// If there wasn't an active block or it overflowed, find an empty block to allocate
for (uint32_t i = 0; i < COUNTOF(state.memory); i++) {
if (!state.memory[i].handle) {
gpu_memory* memory = &state.memory[i];
VkMemoryAllocateInfo memoryInfo = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = MAX(blockSize, info.size),
.memoryTypeIndex = allocator->memoryType
};
VK(vkAllocateMemory(state.device, &memoryInfo, NULL, &memory->handle), "Failed to allocate GPU memory") {
allocator->block = NULL;
return NULL;
}
if (allocator->memoryFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
VK(vkMapMemory(state.device, memory->handle, 0, VK_WHOLE_SIZE, 0, &memory->pointer), "Failed to map memory") {
vkFreeMemory(state.device, memory->handle, NULL);
memory->handle = NULL;
return NULL;
}
} else {
memory->pointer = NULL;
}
allocator->block = memory;
allocator->cursor = info.size;
allocator->block->refs = 1;
*offset = 0;
return memory;
}
}
check(false, "Out of GPU memory");
return NULL;
}
static void gpu_release(gpu_memory* memory) {
if (memory && --memory->refs == 0) {
condemn(memory->handle, VK_OBJECT_TYPE_DEVICE_MEMORY);
memory->handle = NULL;
for (uint32_t i = 0; i < COUNTOF(state.allocators); i++) {
if (state.allocators[i].block == memory) {
state.allocators[i].block = NULL;
state.allocators[i].cursor = 0;
}
}
}
}
static void condemn(void* handle, VkObjectType type) {
if (!handle) return;
gpu_morgue* morgue = &state.morgue;
// If the morgue is full, try expunging to reclaim some space
if (morgue->head - morgue->tail >= COUNTOF(morgue->data)) {
expunge();
// If that didn't work, wait for the GPU to be done with the oldest victim and retry
if (morgue->head - morgue->tail >= COUNTOF(morgue->data)) {
gpu_wait_tick(morgue->data[morgue->tail & MORGUE_MASK].tick);
expunge();
}
// The following should be unreachable
check(morgue->head - morgue->tail < COUNTOF(morgue->data), "Morgue overflow!");
}
morgue->data[morgue->head++ & MORGUE_MASK] = (gpu_victim) { handle, type, state.tick[CPU] };
}
static void expunge(void) {
gpu_morgue* morgue = &state.morgue;
while (morgue->tail != morgue->head && state.tick[GPU] >= morgue->data[morgue->tail & MORGUE_MASK].tick) {
gpu_victim* victim = &morgue->data[morgue->tail++ & MORGUE_MASK];
switch (victim->type) {
case VK_OBJECT_TYPE_BUFFER: vkDestroyBuffer(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_IMAGE: vkDestroyImage(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_IMAGE_VIEW: vkDestroyImageView(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_SAMPLER: vkDestroySampler(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT: vkDestroyDescriptorSetLayout(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_DESCRIPTOR_POOL: vkDestroyDescriptorPool(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_PIPELINE_LAYOUT: vkDestroyPipelineLayout(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_PIPELINE: vkDestroyPipeline(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_QUERY_POOL: vkDestroyQueryPool(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_RENDER_PASS: vkDestroyRenderPass(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_FRAMEBUFFER: vkDestroyFramebuffer(state.device, victim->handle, NULL); break;
case VK_OBJECT_TYPE_DEVICE_MEMORY: vkFreeMemory(state.device, victim->handle, NULL); break;
default: check(false, "Unreachable"); break;
}
}
}
static bool hasLayer(VkLayerProperties* layers, uint32_t count, const char* layer) {
for (uint32_t i = 0; i < count; i++) {
if (!strcmp(layers[i].layerName, layer)) {
return true;
}
}
return false;
}
static bool hasExtension(VkExtensionProperties* extensions, uint32_t count, const char* extension) {
for (uint32_t i = 0; i < count; i++) {
if (!strcmp(extensions[i].extensionName, extension)) {
return true;
}
}
return false;
}
static VkBufferUsageFlags getBufferUsage(gpu_buffer_type type) {
switch (type) {
case GPU_BUFFER_STATIC:
return
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT;
case GPU_BUFFER_STREAM:
return
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
case GPU_BUFFER_UPLOAD:
return VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
case GPU_BUFFER_DOWNLOAD:
return VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
default: return 0;
}
}
static bool transitionAttachment(gpu_texture* texture, bool begin, bool resolve, bool discard, VkImageMemoryBarrier2KHR* barrier) {
if (!texture || texture->layout == VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR) {
return false;
}
bool depth = texture->aspect != VK_IMAGE_ASPECT_COLOR_BIT;
VkPipelineStageFlags2 stage = depth ?
(VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR ) :
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR;
VkAccessFlags2 access = (depth && !resolve) ?
(VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT_KHR | VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR) :
(VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT_KHR | VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR);
if (begin) {
*barrier = (VkImageMemoryBarrier2KHR) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR,
.srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR,
.srcAccessMask = VK_ACCESS_2_NONE_KHR,
.dstStageMask = stage,
.dstAccessMask = access,
.oldLayout = discard || resolve ? VK_IMAGE_LAYOUT_UNDEFINED : texture->layout,
.newLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR,
.image = texture->handle,
.subresourceRange.aspectMask = texture->aspect,
.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS,
.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS
};
} else {
*barrier = (VkImageMemoryBarrier2KHR) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR,
.srcStageMask = stage,
.srcAccessMask = access,
.dstStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR,
.dstAccessMask = VK_ACCESS_2_NONE_KHR,
.oldLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR,
.newLayout = texture->layout,
.image = texture->handle,
.subresourceRange.aspectMask = texture->aspect,
.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS,
.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS
};
}
return true;
}
static VkImageLayout getNaturalLayout(uint32_t usage, VkImageAspectFlags aspect) {
if (usage & (GPU_TEXTURE_STORAGE | GPU_TEXTURE_COPY_SRC | GPU_TEXTURE_COPY_DST)) {
return VK_IMAGE_LAYOUT_GENERAL;
} else if (usage & GPU_TEXTURE_SAMPLE) {
return VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL_KHR;
} else {
return VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR;
}
return VK_IMAGE_LAYOUT_UNDEFINED;
}
static VkFormat convertFormat(gpu_texture_format format, int colorspace) {
static const VkFormat formats[][2] = {
[GPU_FORMAT_R8] = { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_SRGB },
[GPU_FORMAT_RG8] = { VK_FORMAT_R8G8_UNORM, VK_FORMAT_R8G8_SRGB },
[GPU_FORMAT_RGBA8] = { VK_FORMAT_R8G8B8A8_UNORM, VK_FORMAT_R8G8B8A8_SRGB },
[GPU_FORMAT_R16] = { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM },
[GPU_FORMAT_RG16] = { VK_FORMAT_R16G16_UNORM, VK_FORMAT_R16G16_UNORM },
[GPU_FORMAT_RGBA16] = { VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R16G16B16A16_UNORM },
[GPU_FORMAT_R16F] = { VK_FORMAT_R16_SFLOAT, VK_FORMAT_R16_SFLOAT },
[GPU_FORMAT_RG16F] = { VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R16G16_SFLOAT },
[GPU_FORMAT_RGBA16F] = { VK_FORMAT_R16G16B16A16_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT },
[GPU_FORMAT_R32F] = { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT },
[GPU_FORMAT_RG32F] = { VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R32G32_SFLOAT },
[GPU_FORMAT_RGBA32F] = { VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT },
[GPU_FORMAT_RGB565] = { VK_FORMAT_R5G6B5_UNORM_PACK16, VK_FORMAT_R5G6B5_UNORM_PACK16 },
[GPU_FORMAT_RGB5A1] = { VK_FORMAT_R5G5B5A1_UNORM_PACK16, VK_FORMAT_R5G5B5A1_UNORM_PACK16 },
[GPU_FORMAT_RGB10A2] = { VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_FORMAT_A2B10G10R10_UNORM_PACK32 },
[GPU_FORMAT_RG11B10F] = { VK_FORMAT_B10G11R11_UFLOAT_PACK32, VK_FORMAT_B10G11R11_UFLOAT_PACK32 },
[GPU_FORMAT_D16] = { VK_FORMAT_D16_UNORM, VK_FORMAT_D16_UNORM },
[GPU_FORMAT_D32F] = { VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT },
[GPU_FORMAT_D24S8] = { VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT },
[GPU_FORMAT_D32FS8] = { VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT },
[GPU_FORMAT_BC1] = { VK_FORMAT_BC1_RGB_UNORM_BLOCK, VK_FORMAT_BC1_RGB_SRGB_BLOCK },
[GPU_FORMAT_BC2] = { VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC2_SRGB_BLOCK },
[GPU_FORMAT_BC3] = { VK_FORMAT_BC3_UNORM_BLOCK, VK_FORMAT_BC3_SRGB_BLOCK },
[GPU_FORMAT_BC4U] = { VK_FORMAT_BC4_UNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK },
[GPU_FORMAT_BC4S] = { VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC4_SNORM_BLOCK },
[GPU_FORMAT_BC5U] = { VK_FORMAT_BC4_UNORM_BLOCK, VK_FORMAT_BC5_UNORM_BLOCK },
[GPU_FORMAT_BC5S] = { VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK },
[GPU_FORMAT_BC6UF] = { VK_FORMAT_BC6H_UFLOAT_BLOCK, VK_FORMAT_BC6H_UFLOAT_BLOCK },
[GPU_FORMAT_BC6SF] = { VK_FORMAT_BC6H_SFLOAT_BLOCK, VK_FORMAT_BC6H_SFLOAT_BLOCK },
[GPU_FORMAT_BC7] = { VK_FORMAT_BC7_UNORM_BLOCK, VK_FORMAT_BC7_SRGB_BLOCK },
[GPU_FORMAT_ASTC_4x4] = { VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK },
[GPU_FORMAT_ASTC_5x4] = { VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK },
[GPU_FORMAT_ASTC_5x5] = { VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK },
[GPU_FORMAT_ASTC_6x5] = { VK_FORMAT_ASTC_6x5_UNORM_BLOCK, VK_FORMAT_ASTC_6x5_SRGB_BLOCK },
[GPU_FORMAT_ASTC_6x6] = { VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK },
[GPU_FORMAT_ASTC_8x5] = { VK_FORMAT_ASTC_8x5_UNORM_BLOCK, VK_FORMAT_ASTC_8x5_SRGB_BLOCK },
[GPU_FORMAT_ASTC_8x6] = { VK_FORMAT_ASTC_8x6_UNORM_BLOCK, VK_FORMAT_ASTC_8x6_SRGB_BLOCK },
[GPU_FORMAT_ASTC_8x8] = { VK_FORMAT_ASTC_8x8_UNORM_BLOCK, VK_FORMAT_ASTC_8x8_SRGB_BLOCK },
[GPU_FORMAT_ASTC_10x5] = { VK_FORMAT_ASTC_10x5_UNORM_BLOCK, VK_FORMAT_ASTC_10x5_SRGB_BLOCK },
[GPU_FORMAT_ASTC_10x6] = { VK_FORMAT_ASTC_10x6_UNORM_BLOCK, VK_FORMAT_ASTC_10x6_SRGB_BLOCK },
[GPU_FORMAT_ASTC_10x8] = { VK_FORMAT_ASTC_10x8_UNORM_BLOCK, VK_FORMAT_ASTC_10x8_SRGB_BLOCK },
[GPU_FORMAT_ASTC_10x10] = { VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK },
[GPU_FORMAT_ASTC_12x10] = { VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK },
[GPU_FORMAT_ASTC_12x12] = { VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK }
};
if (format == GPU_FORMAT_SURFACE) {
return state.surface.format.format;
}
return formats[format][colorspace];
}
static VkPipelineStageFlags2 convertPhase(gpu_phase phase, bool dst) {
VkPipelineStageFlags2 flags = 0;
if (phase & GPU_PHASE_INDIRECT) flags |= VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT_KHR;
if (phase & GPU_PHASE_INPUT_INDEX) flags |= VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT_KHR;
if (phase & GPU_PHASE_INPUT_VERTEX) flags |= VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT_KHR;
if (phase & GPU_PHASE_SHADER_VERTEX) flags |= VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT_KHR;
if (phase & GPU_PHASE_SHADER_FRAGMENT) flags |= VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR;
if (phase & GPU_PHASE_SHADER_COMPUTE) flags |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR;
if (phase & GPU_PHASE_DEPTH_EARLY) flags |= VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR;
if (phase & GPU_PHASE_DEPTH_LATE) flags |= VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR;
if (phase & GPU_PHASE_COLOR) flags |= VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR;
if (phase & GPU_PHASE_COPY) flags |= VK_PIPELINE_STAGE_2_COPY_BIT_KHR;
if (phase & GPU_PHASE_CLEAR) flags |= VK_PIPELINE_STAGE_2_CLEAR_BIT_KHR;
if (phase & GPU_PHASE_BLIT) flags |= VK_PIPELINE_STAGE_2_BLIT_BIT_KHR;
return flags;
}
static VkAccessFlags2 convertCache(gpu_cache cache) {
VkAccessFlags2 flags = 0;
if (cache & GPU_CACHE_INDIRECT) flags |= VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR;
if (cache & GPU_CACHE_INDEX) flags |= VK_ACCESS_2_INDEX_READ_BIT_KHR;
if (cache & GPU_CACHE_VERTEX) flags |= VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR;
if (cache & GPU_CACHE_UNIFORM) flags |= VK_ACCESS_2_UNIFORM_READ_BIT_KHR;
if (cache & GPU_CACHE_TEXTURE) flags |= VK_ACCESS_2_SHADER_SAMPLED_READ_BIT_KHR;
if (cache & GPU_CACHE_STORAGE_READ) flags |= VK_ACCESS_2_SHADER_STORAGE_READ_BIT_KHR;
if (cache & GPU_CACHE_STORAGE_WRITE) flags |= VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT_KHR;
if (cache & GPU_CACHE_DEPTH_READ) flags |= VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT_KHR;
if (cache & GPU_CACHE_DEPTH_WRITE) flags |= VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR;
if (cache & GPU_CACHE_COLOR_READ) flags |= VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT_KHR;
if (cache & GPU_CACHE_COLOR_WRITE) flags |= VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR;
if (cache & GPU_CACHE_TRANSFER_READ) flags |= VK_ACCESS_2_TRANSFER_READ_BIT_KHR;
if (cache & GPU_CACHE_TRANSFER_WRITE) flags |= VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR;
return flags;
}
static VkBool32 relay(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT flags, const VkDebugUtilsMessengerCallbackDataEXT* data, void* userdata) {
LOG(data->pMessage);
return VK_FALSE;
}
static void nickname(void* handle, VkObjectType type, const char* name) {
if (name && state.extensions.debug) {
union { uint64_t u64; void* p; } pointer = { .p = handle };
VkDebugUtilsObjectNameInfoEXT info = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
.objectType = type,
.objectHandle = pointer.u64,
.pObjectName = name
};
VK(vkSetDebugUtilsObjectNameEXT(state.device, &info), "Nickname failed") {}
}
}
static bool vcheck(VkResult result, const char* message) {
if (result >= 0) return true;
if (!state.config.fnLog) return false;
const char* errorCode = "";
#define CASE(x) case x: errorCode = " (" #x ")"; break;
switch (result) {
CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
CASE(VK_ERROR_INITIALIZATION_FAILED);
CASE(VK_ERROR_DEVICE_LOST);
CASE(VK_ERROR_MEMORY_MAP_FAILED);
CASE(VK_ERROR_LAYER_NOT_PRESENT);
CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
CASE(VK_ERROR_FEATURE_NOT_PRESENT);
CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
CASE(VK_ERROR_TOO_MANY_OBJECTS);
CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
CASE(VK_ERROR_FRAGMENTED_POOL);
CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
default: break;
}
#undef CASE
char string[128];
size_t length1 = strlen(message);
size_t length2 = strlen(errorCode);
if (length1 + length2 >= sizeof(string)) {
state.config.fnLog(state.config.userdata, message, true);
return false;
} else {
memcpy(string, message, length1);
memcpy(string + length1, errorCode, length2);
string[length1 + length2] = '\0';
state.config.fnLog(state.config.userdata, string, true);
return false;
}
}
static bool check(bool condition, const char* message) {
if (!condition && state.config.fnLog) {
state.config.fnLog(state.config.userdata, message, true);
}
return condition;
}