Render passes;

This commit is contained in:
bjorn 2022-05-11 12:51:13 -07:00
parent 24f09ea608
commit f7b4ec725f
4 changed files with 310 additions and 10 deletions

View File

@ -336,7 +336,7 @@ static void luax_checkbufferformat(lua_State* L, int index, BufferInfo* info) {
}
}
static int luax_checkcanvas(lua_State* L, int index, PassInfo* info) {
static Canvas luax_checkcanvas(lua_State* L, int index) {
Canvas canvas = {
.loads = { LOAD_CLEAR, LOAD_CLEAR, LOAD_CLEAR, LOAD_CLEAR },
.depth.format = FORMAT_D32F,
@ -354,7 +354,7 @@ static int luax_checkcanvas(lua_State* L, int index, PassInfo* info) {
} else if (lua_isuserdata(L, index)) {
canvas.textures[0] = luax_checktype(L, index, Texture);
} else if (!lua_istable(L, index)) {
return luax_typeerror(L, index, "Texture or table");
luax_typeerror(L, index, "Texture or table");
} else {
for (uint32_t i = 0; i < 4; i++) {
lua_rawgeti(L, index, i + 1);
@ -439,7 +439,7 @@ static int luax_checkcanvas(lua_State* L, int index, PassInfo* info) {
lua_pop(L, 1);
}
return index + 1;
return canvas;
}
static int l_lovrGraphicsInit(lua_State* L) {
@ -584,11 +584,9 @@ static int l_lovrGraphicsIsFormatSupported(lua_State* L) {
static int l_lovrGraphicsPass(lua_State* L) {
PassInfo info;
int index = 1;
info.type = luax_checkenum(L, index++, PassType, NULL);
if (info.type == PASS_RENDER) index = luax_checkcanvas(L, 2, &info);
info.label = lua_tostring(L, index++);
info.type = luax_checkenum(L, 1, PassType, NULL);
if (info.type == PASS_RENDER) info.canvas = luax_checkcanvas(L, 2);
info.label = lua_tostring(L, info.type == PASS_RENDER ? 3 : 2);
Pass* pass = lovrGraphicsGetPass(&info);
luax_pushtype(L, Pass, pass);
@ -683,6 +681,7 @@ static int l_lovrGraphicsNewTexture(lua_State* L) {
info.depth = luax_checku32(L, index++);
info.type = TEXTURE_ARRAY;
}
info.usage |= TEXTURE_RENDER;
} else if (lua_istable(L, 1)) {
info.imageCount = luax_len(L, index++);
images = info.imageCount > COUNTOF(stack) ? malloc(info.imageCount * sizeof(Image*)) : stack;
@ -779,7 +778,7 @@ static int l_lovrGraphicsNewTexture(lua_State* L) {
}
break;
}
case LUA_TNIL: info.usage = (info.imageCount == 0) ? TEXTURE_RENDER | TEXTURE_SAMPLE : TEXTURE_SAMPLE; break;
case LUA_TNIL: break;
default: return luaL_error(L, "Expected Texture usage to be a string, table, or nil");
}
lua_pop(L, 1);

View File

@ -409,9 +409,44 @@ void gpu_pipeline_destroy(gpu_pipeline* pipeline);
// Stream
typedef enum {
GPU_LOAD_OP_LOAD,
GPU_LOAD_OP_CLEAR,
GPU_LOAD_OP_DISCARD
} gpu_load_op;
typedef enum {
GPU_SAVE_OP_SAVE,
GPU_SAVE_OP_DISCARD
} gpu_save_op;
typedef struct {
gpu_texture* texture;
gpu_texture* resolve;
gpu_load_op load;
gpu_save_op save;
float clear[4];
} gpu_color_attachment;
typedef struct {
gpu_texture* texture;
gpu_load_op load, stencilLoad;
gpu_save_op save, stencilSave;
struct { float depth; uint8_t stencil; } clear;
} gpu_depth_attachment;
typedef struct {
gpu_color_attachment color[4];
gpu_depth_attachment depth;
uint32_t size[2];
} gpu_canvas;
gpu_stream* gpu_stream_begin(const char* label);
void gpu_stream_end(gpu_stream* stream);
void gpu_render_begin(gpu_stream* stream, gpu_canvas* canvas);
void gpu_render_end(gpu_stream* stream);
void gpu_compute_begin(gpu_stream* stream);
void gpu_compute_end(gpu_stream* stream);
void gpu_copy_buffers(gpu_stream* stream, gpu_buffer* src, gpu_buffer* dst, uint32_t srcOffset, uint32_t dstOffset, uint32_t size);
void gpu_copy_textures(gpu_stream* stream, gpu_texture* src, gpu_texture* dst, uint16_t srcOffset[4], uint16_t dstOffset[4], uint16_t size[3]);
void gpu_copy_buffer_texture(gpu_stream* stream, gpu_buffer* src, gpu_texture* dst, uint32_t srcOffset, uint16_t dstOffset[4], uint16_t extent[3]);

View File

@ -153,6 +153,7 @@ static struct {
VkPipelineCache pipelineCache;
VkDebugUtilsMessengerEXT messenger;
gpu_cache_entry renderpasses[16][4];
gpu_cache_entry framebuffers[16][4];
gpu_allocator allocators[GPU_MEMORY_COUNT];
uint8_t allocatorLookup[GPU_MEMORY_COUNT];
gpu_scratchpad scratchpad[2];
@ -184,6 +185,7 @@ static void gpu_release(gpu_memory* memory);
static void condemn(void* handle, VkObjectType type);
static void expunge(void);
static VkRenderPass getCachedRenderPass(gpu_pass_info* pass, bool compatible);
static VkFramebuffer getCachedFramebuffer(VkRenderPass pass, VkImageView images[9], uint32_t imageCount, uint32_t size[2]);
static VkImageLayout getNaturalLayout(uint32_t usage, VkImageAspectFlags aspect);
static VkFormat convertFormat(gpu_texture_format format, int colorspace);
static VkBool32 relay(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT flags, const VkDebugUtilsMessengerCallbackDataEXT* data, void* userdata);
@ -1198,6 +1200,73 @@ void gpu_stream_end(gpu_stream* stream) {
VK(vkEndCommandBuffer(stream->commands), "Failed to end stream") return;
}
void gpu_render_begin(gpu_stream* stream, gpu_canvas* canvas) {
gpu_texture* texture = canvas->color[0].texture ? canvas->color[0].texture : canvas->depth.texture;
gpu_pass_info pass = {
.views = texture->layers,
.samples = texture->samples,
.resolve = !!canvas->color[0].resolve
};
VkImageView images[9];
VkClearValue clears[9];
for (uint32_t i = 0; i < COUNTOF(canvas->color) && canvas->color[i].texture; i++) {
images[i] = canvas->color[i].texture->view;
memcpy(clears[i].color.float32, canvas->color[i].clear, 4 * sizeof(float));
pass.color[i].format = convertFormat(canvas->color[i].texture->format, canvas->color[i].texture->srgb);
pass.color[i].layout = canvas->color[i].texture->layout;
pass.color[i].load = canvas->color[i].load;
pass.color[i].save = canvas->color[i].save;
pass.count++;
}
if (pass.resolve) {
for (uint32_t i = 0; i < pass.count; i++) {
images[pass.count + i] = canvas->color[i].resolve->view;
}
pass.count <<= 1;
}
if (canvas->depth.texture) {
uint32_t index = pass.count++;
images[index] = canvas->depth.texture->view;
clears[index].depthStencil.depth = canvas->depth.clear.depth;
clears[index].depthStencil.stencil = canvas->depth.clear.stencil;
pass.depth.format = convertFormat(canvas->depth.texture->format, LINEAR);
pass.depth.layout = canvas->depth.texture->layout;
pass.depth.load = canvas->depth.load;
pass.depth.save = canvas->depth.save;
}
VkRenderPass renderPass = getCachedRenderPass(&pass, false);
VkFramebuffer framebuffer = getCachedFramebuffer(renderPass, images, pass.count, canvas->size);
VkRenderPassBeginInfo beginfo = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = renderPass,
.framebuffer = framebuffer,
.renderArea = { { 0, 0 }, { canvas->size[0], canvas->size[1] } },
.clearValueCount = pass.count,
.pClearValues = clears
};
vkCmdBeginRenderPass(stream->commands, &beginfo, VK_SUBPASS_CONTENTS_INLINE);
}
void gpu_render_end(gpu_stream* stream) {
vkCmdEndRenderPass(stream->commands);
}
void gpu_compute_begin(gpu_stream* stream) {
//
}
void gpu_compute_end(gpu_stream* stream) {
//
}
void gpu_copy_buffers(gpu_stream* stream, gpu_buffer* src, gpu_buffer* dst, uint32_t srcOffset, uint32_t dstOffset, uint32_t size) {
vkCmdCopyBuffer(stream->commands, src->handle, dst->handle, 1, &(VkBufferCopy) {
.srcOffset = src->offset + srcOffset,
@ -1697,6 +1766,12 @@ void gpu_destroy(void) {
if (tick->semaphores[1]) vkDestroySemaphore(state.device, tick->semaphores[1], NULL);
if (tick->fence) vkDestroyFence(state.device, tick->fence, NULL);
}
for (uint32_t i = 0; i < COUNTOF(state.framebuffers); i++) {
for (uint32_t j = 0; j < COUNTOF(state.framebuffers[0]); j++) {
VkFramebuffer framebuffer = state.framebuffers[i][j].object;
if (framebuffer) vkDestroyFramebuffer(state.device, framebuffer, NULL);
}
}
for (uint32_t i = 0; i < COUNTOF(state.renderpasses); i++) {
for (uint32_t j = 0; j < COUNTOF(state.renderpasses[0]); j++) {
VkRenderPass pass = state.renderpasses[i][j].object;
@ -2023,6 +2098,56 @@ static VkRenderPass getCachedRenderPass(gpu_pass_info* pass, bool compatible) {
return handle;
}
VkFramebuffer getCachedFramebuffer(VkRenderPass pass, VkImageView images[9], uint32_t imageCount, uint32_t size[2]) {
uint32_t hash = HASH_SEED;
hash = hash32(hash, images, imageCount * sizeof(images[0]));
hash = hash32(hash, size, 2 * sizeof(uint32_t));
hash = hash32(hash, &pass, sizeof(pass));
uint32_t rows = COUNTOF(state.framebuffers);
uint32_t cols = COUNTOF(state.framebuffers[0]);
gpu_cache_entry* row = state.framebuffers[hash & (rows - 1)];
for (uint32_t i = 0; i < cols; i++) {
if ((row[i].hash & ~0u) == hash) {
row[i].hash = ((uint64_t) state.tick[CPU] << 32) | hash;
return row[i].object;
}
}
VkFramebufferCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.renderPass = pass,
.attachmentCount = imageCount,
.pAttachments = images,
.width = size[0],
.height = size[1],
.layers = 1
};
gpu_cache_entry* entry = &row[0];
for (uint32_t i = 1; i < cols; i++) {
if (!row[i].object || row[i].hash < entry->hash) {
entry = &row[i];
}
}
if (entry->object && gpu_finished(entry->hash >> 32)) {
vkDestroyFramebuffer(state.device, entry->object, NULL);
} else {
condemn(entry->object, VK_OBJECT_TYPE_FRAMEBUFFER);
}
VkFramebuffer framebuffer;
VK(vkCreateFramebuffer(state.device, &info, NULL, &framebuffer), "Failed to create framebuffer") {
return VK_NULL_HANDLE;
}
entry->object = framebuffer;
entry->hash = ((uint64_t) state.tick[CPU] << 32) | hash;
return framebuffer;
}
static VkImageLayout getNaturalLayout(uint32_t usage, VkImageAspectFlags aspect) {
if (usage & (GPU_TEXTURE_STORAGE | GPU_TEXTURE_COPY_SRC | GPU_TEXTURE_COPY_DST)) {
return VK_IMAGE_LAYOUT_GENERAL;

View File

@ -1,6 +1,7 @@
#include "graphics/graphics.h"
#include "data/blob.h"
#include "data/image.h"
#include "math/math.h"
#include "core/gpu.h"
#include "core/maf.h"
#include "core/os.h"
@ -61,6 +62,12 @@ struct Pass {
uint32_t pipelineIndex;
};
typedef struct {
gpu_texture* handle;
uint32_t hash;
uint32_t tick;
} ScratchTexture;
typedef struct {
char* memory;
uint32_t cursor;
@ -75,6 +82,7 @@ static struct {
gpu_device_info device;
gpu_features features;
gpu_limits limits;
ScratchTexture scratchTextures[16];
Allocator allocator;
} state;
@ -83,6 +91,7 @@ static struct {
static void* tempAlloc(size_t size);
static void beginFrame(void);
static gpu_stream* getTransfers(void);
static gpu_texture* getScratchTexture(uint32_t size[2], uint32_t layers, TextureFormat format, bool srgb, uint32_t samples);
static size_t measureTexture(TextureFormat format, uint16_t w, uint16_t h, uint16_t d);
static void onMessage(void* context, const char* message, bool severe);
@ -119,6 +128,12 @@ bool lovrGraphicsInit(bool debug) {
void lovrGraphicsDestroy() {
if (!state.initialized) return;
for (uint32_t i = 0; i < COUNTOF(state.scratchTextures); i++) {
if (state.scratchTextures[i].handle) {
gpu_texture_destroy(state.scratchTextures[i].handle);
free(state.scratchTextures[i].handle);
}
}
gpu_destroy();
glslang_finalize_process();
os_vm_free(state.allocator.memory, MAX_FRAME_MEMORY);
@ -204,6 +219,9 @@ void lovrGraphicsSubmit(Pass** passes, uint32_t count) {
for (uint32_t i = 0; i < count; i++) {
streams[extraPassCount + i] = passes[i]->stream;
if (passes[i]->info.type == PASS_RENDER) {
gpu_render_end(passes[i]->stream);
}
}
for (uint32_t i = 0; i < extraPassCount + count; i++) {
@ -629,6 +647,78 @@ Pass* lovrGraphicsGetPass(PassInfo* info) {
pass->ref = 1;
pass->info = *info;
pass->stream = gpu_stream_begin(info->label);
if (info->type == PASS_RENDER) {
Canvas* canvas = &info->canvas;
const TextureInfo* main = canvas->textures[0] ? &canvas->textures[0]->info : &canvas->depth.texture->info;
lovrCheck(canvas->textures[0] || canvas->depth.texture, "Render pass must have at least one color or depth texture");
lovrCheck(main->width <= state.limits.renderSize[0], "Render pass width (%d) exceeds the renderSize limit of this GPU (%d)", main->width, state.limits.renderSize[0]);
lovrCheck(main->height <= state.limits.renderSize[1], "Render pass height (%d) exceeds the renderSize limit of this GPU (%d)", main->height, state.limits.renderSize[1]);
lovrCheck(main->depth <= state.limits.renderSize[2], "Render pass view count (%d) exceeds the renderSize limit of this GPU (%d)", main->depth, state.limits.renderSize[2]);
lovrCheck(canvas->samples == 1 || canvas->samples == 4, "Currently, render pass sample count must be 1 or 4");
uint32_t colorTextureCount = 0;
for (uint32_t i = 0; i < COUNTOF(canvas->textures) && canvas->textures[i]; i++, colorTextureCount++) {
const TextureInfo* texture = &canvas->textures[i]->info;
bool renderable = state.features.formats[texture->format] & GPU_FEATURE_RENDER;
lovrCheck(renderable, "This GPU does not support rendering to the texture format used by Canvas texture #%d", i + 1);
lovrCheck(texture->usage & TEXTURE_RENDER, "Texture must be created with the 'render' flag to render to it");
lovrCheck(texture->width == main->width, "Render pass texture sizes must match");
lovrCheck(texture->height == main->height, "Render pass texture sizes must match");
lovrCheck(texture->depth == main->depth, "Render pass texture sizes must match");
lovrCheck(texture->samples == main->samples, "Render pass texture sample counts must match");
}
if (canvas->depth.texture || canvas->depth.format) {
TextureFormat format = canvas->depth.texture ? canvas->depth.texture->info.format : canvas->depth.format;
bool renderable = state.features.formats[format] & GPU_FEATURE_RENDER;
lovrCheck(format == FORMAT_D16 || format == FORMAT_D24S8 || format == FORMAT_D32F, "Depth buffer must use a depth format");
lovrCheck(renderable, "This GPU does not support depth buffers with this texture format");
if (canvas->depth.texture) {
const TextureInfo* texture = &canvas->depth.texture->info;
lovrCheck(texture->usage & TEXTURE_RENDER, "Texture must be created with the 'render' flag to render to it");
lovrCheck(texture->width == main->width, "Render pass texture sizes must match");
lovrCheck(texture->height == main->height, "Render pass texture sizes must match");
lovrCheck(texture->depth == main->depth, "Render pass texture sizes must match");
lovrCheck(texture->samples == main->samples, "Currently, depth buffer sample count must match the main render pass sample count");
}
}
gpu_canvas target = {
.size = { main->width, main->height }
};
for (uint32_t i = 0; i < colorTextureCount; i++) {
if (main->samples == 1 && canvas->samples > 1) {
TextureFormat format = canvas->textures[i]->info.format;
bool srgb = canvas->textures[i]->info.srgb;
target.color[i].texture = getScratchTexture(target.size, main->depth, format, srgb, canvas->samples);
target.color[i].resolve = canvas->textures[i]->renderView;
} else {
target.color[i].texture = canvas->textures[i]->renderView;
}
target.color[i].load = (gpu_load_op) canvas->loads[i];
target.color[i].save = GPU_SAVE_OP_SAVE;
target.color[i].clear[0] = lovrMathGammaToLinear(canvas->clears[i][0]);
target.color[i].clear[1] = lovrMathGammaToLinear(canvas->clears[i][1]);
target.color[i].clear[2] = lovrMathGammaToLinear(canvas->clears[i][2]);
target.color[i].clear[3] = canvas->clears[i][2];
}
if (canvas->depth.texture) {
target.depth.texture = canvas->depth.texture->renderView;
} else {
target.depth.texture = getScratchTexture(target.size, main->depth, canvas->depth.format, false, canvas->samples);
}
target.depth.load = target.depth.stencilLoad = (gpu_load_op) canvas->depth.load;
target.depth.save = canvas->depth.texture ? GPU_SAVE_OP_SAVE : GPU_SAVE_OP_DISCARD;
target.depth.clear.depth = canvas->depth.clear;
gpu_render_begin(pass->stream, &target);
}
return pass;
}
@ -881,6 +971,57 @@ static gpu_stream* getTransfers(void) {
return state.transfers->stream;
}
static gpu_texture* getScratchTexture(uint32_t size[2], uint32_t layers, TextureFormat format, bool srgb, uint32_t samples) {
ScratchTexture* textures = state.scratchTextures;
uint16_t key[] = { size[0], size[1], layers, format, srgb, samples };
uint32_t hash = hash64(key, sizeof(key));
for (uint32_t i = 0; i < COUNTOF(state.scratchTextures) && textures[i].handle; i++) {
if (textures[i].hash == hash && textures[i].tick != state.tick) {
textures[i].tick = state.tick;
return textures[i].handle;
}
}
// Otherwise, create new texture, add to an empty slot, evicting oldest if needed
gpu_texture_info info = {
.type = GPU_TEXTURE_ARRAY,
.format = (gpu_texture_format) format,
.size[0] = size[0],
.size[1] = size[1],
.size[2] = layers,
.mipmaps = 1,
.samples = samples,
.usage = GPU_TEXTURE_RENDER | GPU_TEXTURE_TRANSIENT,
.upload.stream = getTransfers(),
.srgb = srgb
};
uint32_t slot = 0;
uint32_t oldest = ~0u;
for (uint32_t i = 0; i < COUNTOF(state.scratchTextures); i++) {
if (!textures[i].handle) {
slot = i;
break;
} else if (textures[i].tick < oldest) {
oldest = textures[i].tick;
slot = i;
}
}
if (!textures[slot].handle) {
textures[slot].handle = calloc(1, gpu_sizeof_texture());
lovrAssert(textures[slot].handle, "Out of memory");
} else {
gpu_texture_destroy(textures[slot].handle);
}
lovrAssert(gpu_texture_init(textures[slot].handle, &info), "Failed to create scratch texture");
textures[slot].hash = hash;
textures[slot].tick = state.tick;
return textures[slot].handle;
}
// Returns number of bytes of a 3D texture region of a given format
static size_t measureTexture(TextureFormat format, uint16_t w, uint16_t h, uint16_t d) {
switch (format) {