Improve temporary buffers;

- They no longer live in temporary memory, but in a dedicated pool.
- There are error checks for using a temporary buffer after it's invalid
  - However, these are imperfect, and could be improved.  One idea is to
    avoid recycling a temporary buffer until its refcount decays (i.e.
    Lua finally decides to garbage collect it).  This would explode
    memory usage sometimes, so it could only be enabled when
    t.graphics.debug is true.
This commit is contained in:
bjorn 2022-08-22 20:30:09 -07:00
parent 10252686aa
commit 79cd7c10a1
5 changed files with 79 additions and 34 deletions

View File

@ -155,6 +155,7 @@ bool luax_writefile(const char* filename, const void* data, size_t size);
struct Buffer;
struct ColoredString;
struct Model;
struct Buffer* luax_checkbuffer(struct lua_State* L, int index);
void luax_readbufferfield(struct lua_State* L, int index, int type, void* data);
void luax_readbufferdata(struct lua_State* L, int index, struct Buffer* buffer, char* data);
uint32_t luax_checkcomparemode(struct lua_State* L, int index);

View File

@ -63,6 +63,12 @@ typedef union {
float* f32;
} FieldPointer;
Buffer* luax_checkbuffer(lua_State* L, int index) {
Buffer* buffer = luax_checktype(L, index, Buffer);
lovrCheck(lovrBufferIsValid(buffer), "Buffers created with getBuffer can only be used for a single frame (unable to use this Buffer again because lovr.graphics.submit has been called since it was created)");
return buffer;
}
void luax_readbufferfield(lua_State* L, int index, int type, void* data) {
FieldPointer p = { .raw = data };
if (lua_isuserdata(L, index)) {
@ -216,7 +222,7 @@ void luax_readbufferdata(lua_State* L, int index, Buffer* buffer, char* data) {
}
static int l_lovrBufferGetSize(lua_State* L) {
Buffer* buffer = luax_checktype(L, 1, Buffer);
Buffer* buffer = luax_checkbuffer(L, 1);
const BufferInfo* info = lovrBufferGetInfo(buffer);
uint32_t size = info->length * MAX(info->stride, 1);
lua_pushinteger(L, size);
@ -224,21 +230,21 @@ static int l_lovrBufferGetSize(lua_State* L) {
}
static int l_lovrBufferGetLength(lua_State* L) {
Buffer* buffer = luax_checktype(L, 1, Buffer);
Buffer* buffer = luax_checkbuffer(L, 1);
const BufferInfo* info = lovrBufferGetInfo(buffer);
lua_pushinteger(L, info->length);
return 1;
}
static int l_lovrBufferGetStride(lua_State* L) {
Buffer* buffer = luax_checktype(L, 1, Buffer);
Buffer* buffer = luax_checkbuffer(L, 1);
const BufferInfo* info = lovrBufferGetInfo(buffer);
lua_pushinteger(L, info->stride);
return 1;
}
static int l_lovrBufferGetFormat(lua_State* L) {
Buffer* buffer = luax_checktype(L, 1, Buffer);
Buffer* buffer = luax_checkbuffer(L, 1);
const BufferInfo* info = lovrBufferGetInfo(buffer);
lua_createtable(L, info->fieldCount, 0);
for (uint32_t i = 0; i < info->fieldCount; i++) {
@ -256,7 +262,7 @@ static int l_lovrBufferGetFormat(lua_State* L) {
}
static int l_lovrBufferGetPointer(lua_State* L) {
Buffer* buffer = luax_checktype(L, 1, Buffer);
Buffer* buffer = luax_checkbuffer(L, 1);
if (!lovrBufferIsTemporary(buffer)) {
lua_pushnil(L);
return 1;
@ -267,20 +273,20 @@ static int l_lovrBufferGetPointer(lua_State* L) {
}
static int l_lovrBufferIsTemporary(lua_State* L) {
Buffer* buffer = luax_checktype(L, 1, Buffer);
Buffer* buffer = luax_checkbuffer(L, 1);
bool temporary = lovrBufferIsTemporary(buffer);
lua_pushboolean(L, temporary);
return 1;
}
static int l_lovrBufferSetData(lua_State* L) {
Buffer* buffer = luax_checktype(L, 1, Buffer);
Buffer* buffer = luax_checkbuffer(L, 1);
luax_readbufferdata(L, 2, buffer, NULL);
return 0;
}
static int l_lovrBufferClear(lua_State* L) {
Buffer* buffer = luax_checktype(L, 1, Buffer);
Buffer* buffer = luax_checkbuffer(L, 1);
const BufferInfo* info = lovrBufferGetInfo(buffer);
uint32_t index = luaL_optinteger(L, 2, 1);
uint32_t count = luaL_optinteger(L, 3, info->length - index + 1);

View File

@ -824,7 +824,7 @@ static int l_lovrPassDraw(lua_State* L) {
static int l_lovrPassMesh(lua_State* L) {
Pass* pass = luax_checktype(L, 1, Pass);
Buffer* vertices = !lua_toboolean(L, 2) ? NULL : luax_checktype(L, 2, Buffer);
Buffer* vertices = !lua_toboolean(L, 2) ? NULL : luax_checkbuffer(L, 2);
Buffer* indices = luax_totype(L, 3, Buffer);
Buffer* indirect = luax_totype(L, 4, Buffer);
if (indirect) {
@ -892,7 +892,7 @@ static int l_lovrPassCopy(lua_State* L) {
Pass* pass = luax_checktype(L, 1, Pass);
if (lua_istable(L, 2)) {
Buffer* buffer = luax_checktype(L, 3, Buffer);
Buffer* buffer = luax_checkbuffer(L, 3);
uint32_t srcIndex = luax_optu32(L, 4, 1) - 1;
uint32_t dstIndex = luax_optu32(L, 5, 1) - 1;
@ -914,7 +914,7 @@ static int l_lovrPassCopy(lua_State* L) {
Blob* blob = luax_totype(L, 2, Blob);
if (blob) {
Buffer* buffer = luax_checktype(L, 3, Buffer);
Buffer* buffer = luax_checkbuffer(L, 3);
uint32_t srcOffset = luax_optu32(L, 4, 0);
uint32_t dstOffset = luax_optu32(L, 5, 0);
const BufferInfo* info = lovrBufferGetInfo(buffer);
@ -930,7 +930,7 @@ static int l_lovrPassCopy(lua_State* L) {
Buffer* buffer = luax_totype(L, 2, Buffer);
if (buffer) {
Buffer* dst = luax_checktype(L, 3, Buffer);
Buffer* dst = luax_checkbuffer(L, 3);
uint32_t srcOffset = luax_optu32(L, 4, 0);
uint32_t dstOffset = luax_optu32(L, 5, 0);
const BufferInfo* srcInfo = lovrBufferGetInfo(buffer);
@ -988,7 +988,7 @@ static int l_lovrPassCopy(lua_State* L) {
Tally* tally = luax_totype(L, 2, Tally);
if (tally) {
Buffer* buffer = luax_checktype(L, 3, Buffer);
Buffer* buffer = luax_checkbuffer(L, 3);
uint32_t srcIndex = luax_optu32(L, 4, 0);
uint32_t dstOffset = luax_optu32(L, 5, 0);
uint32_t count = luax_optu32(L, 5, ~0u);

View File

@ -24,25 +24,9 @@
uint32_t os_vk_create_surface(void* instance, void** surface);
const char** os_vk_get_instance_extensions(uint32_t* count);
#define MAX_FRAME_MEMORY (1 << 30)
#define MAX_SHADER_RESOURCES 32
#define MATERIALS_PER_BLOCK 256
#define FLOAT_BITS(f) ((union { float f; uint32_t u; }) { f }).u
typedef struct {
struct { float x, y, z; } position;
struct { float x, y, z; } normal;
struct { float u, v; } uv;
} ShapeVertex;
typedef struct {
struct { float x, y, z; } position;
struct { float x, y, z; } normal;
struct { float u, v; } uv;
struct { uint8_t r, g, b, a; } color;
struct { float x, y, z; } tangent;
} ModelVertex;
typedef struct {
gpu_phase readPhase;
gpu_phase writePhase;
@ -58,6 +42,7 @@ struct Buffer {
gpu_buffer* gpu;
BufferInfo info;
uint64_t hash;
uint32_t tick;
Sync sync;
};
@ -264,6 +249,20 @@ typedef struct {
Font* font;
} Pipeline;
typedef struct {
struct { float x, y, z; } position;
struct { float x, y, z; } normal;
struct { float u, v; } uv;
} ShapeVertex;
typedef struct {
struct { float x, y, z; } position;
struct { float x, y, z; } normal;
struct { float u, v; } uv;
struct { uint8_t r, g, b, a; } color;
struct { float x, y, z; } tangent;
} ModelVertex;
enum {
SHAPE_PLANE,
SHAPE_BOX,
@ -355,6 +354,7 @@ typedef struct {
char* memory;
size_t cursor;
size_t length;
size_t limit;
} Allocator;
static struct {
@ -386,6 +386,9 @@ static struct {
Material* defaultMaterial;
size_t materialBlock;
arr_t(MaterialBlock) materialBlocks;
size_t scratchBufferIndex;
arr_t(Buffer*) scratchBuffers;
arr_t(gpu_buffer*) scratchBufferHandles;
map_t pipelineLookup;
arr_t(gpu_pipeline*) pipelines;
arr_t(Layout) layouts;
@ -462,13 +465,18 @@ bool lovrGraphicsInit(GraphicsConfig* config) {
// Temporary frame memory uses a large 1GB virtual memory allocation, committing pages as needed
state.allocator.length = 1 << 14;
state.allocator.memory = os_vm_init(MAX_FRAME_MEMORY);
state.allocator.limit = 1 << 30;
state.allocator.memory = os_vm_init(state.allocator.limit);
os_vm_commit(state.allocator.memory, state.allocator.length);
map_init(&state.pipelineLookup, 64);
arr_init(&state.pipelines, realloc);
arr_init(&state.layouts, realloc);
arr_init(&state.materialBlocks, realloc);
arr_init(&state.scratchBuffers, realloc);
arr_init(&state.scratchBufferHandles, realloc);
arr_reserve(&state.scratchBuffers, 8);
arr_reserve(&state.scratchBufferHandles, 8);
gpu_slot builtinSlots[] = {
{ 0, GPU_SLOT_UNIFORM_BUFFER, GPU_STAGE_ALL }, // Globals
@ -681,6 +689,12 @@ void lovrGraphicsDestroy() {
free(block->bundles);
}
arr_free(&state.materialBlocks);
for (size_t i = 0; i < state.scratchBuffers.length; i++) {
free(state.scratchBuffers.data[i]);
free(state.scratchBufferHandles.data[i]);
}
arr_free(&state.scratchBuffers);
arr_free(&state.scratchBufferHandles);
for (size_t i = 0; i < state.pipelines.length; i++) {
gpu_pipeline_destroy(state.pipelines.data[i]);
free(state.pipelines.data[i]);
@ -703,7 +717,7 @@ void lovrGraphicsDestroy() {
arr_free(&state.layouts);
gpu_destroy();
glslang_finalize_process();
os_vm_free(state.allocator.memory, MAX_FRAME_MEMORY);
os_vm_free(state.allocator.memory, state.allocator.limit);
memset(&state, 0, sizeof(state));
}
@ -1037,15 +1051,32 @@ Buffer* lovrGraphicsGetBuffer(BufferInfo* info, void** data) {
uint32_t size = info->length * info->stride;
lovrCheck(size > 0, "Buffer size can not be zero");
lovrCheck(size <= 1 << 30, "Max buffer size is 1GB");
const uint32_t BUFFERS_PER_CHUNK = 64;
if (state.scratchBufferIndex >= state.scratchBuffers.length * BUFFERS_PER_CHUNK) {
Buffer* buffers = malloc(BUFFERS_PER_CHUNK * sizeof(Buffer));
gpu_buffer* handles = malloc(BUFFERS_PER_CHUNK * gpu_sizeof_buffer());
lovrAssert(buffers && handles, "Out of memory");
for (uint32_t i = 0; i < BUFFERS_PER_CHUNK; i++) {
buffers[i].gpu = (gpu_buffer*) ((char*) handles + gpu_sizeof_buffer() * i);
}
arr_push(&state.scratchBuffers, buffers);
arr_push(&state.scratchBufferHandles, handles);
}
uint32_t index = state.scratchBufferIndex++;
Buffer* buffer = &state.scratchBuffers.data[index / BUFFERS_PER_CHUNK][index % BUFFERS_PER_CHUNK];
Buffer* buffer = tempAlloc(sizeof(Buffer) + gpu_sizeof_buffer());
buffer->ref = 1;
buffer->size = size;
buffer->gpu = (gpu_buffer*) (buffer + 1);
buffer->info = *info;
buffer->hash = hash64(info->fields, info->fieldCount * sizeof(BufferField));
beginFrame();
buffer->pointer = gpu_map(buffer->gpu, size, state.limits.uniformBufferAlign, GPU_MAP_WRITE);
buffer->tick = state.tick;
if (data) {
*data = buffer->pointer;
@ -1100,6 +1131,10 @@ bool lovrBufferIsTemporary(Buffer* buffer) {
return buffer->pointer != NULL;
}
bool lovrBufferIsValid(Buffer* buffer) {
return !lovrBufferIsTemporary(buffer) || buffer->tick == state.tick;
}
void* lovrBufferMap(Buffer* buffer, uint32_t offset, uint32_t size) {
lovrAssert(buffer->pointer, "This function can only be called on temporary buffers");
return buffer->pointer + offset;
@ -1869,6 +1904,7 @@ void lovrShaderGetWorkgroupSize(Shader* shader, uint32_t size[3]) {
Material* lovrMaterialCreate(const MaterialInfo* info) {
MaterialBlock* block = &state.materialBlocks.data[state.materialBlock];
const uint32_t MATERIALS_PER_BLOCK = 256;
if (!block || block->head == ~0u || !gpu_is_complete(block->list[block->head].tick)) {
bool found = false;
@ -5431,7 +5467,7 @@ void lovrPassTock(Pass* pass, Tally* tally, uint32_t index) {
static void* tempAlloc(size_t size) {
while (state.allocator.cursor + size > state.allocator.length) {
lovrAssert(state.allocator.length << 1 <= MAX_FRAME_MEMORY, "Out of memory");
lovrAssert(state.allocator.length << 1 <= state.allocator.limit, "Out of memory");
os_vm_commit(state.allocator.memory + state.allocator.length, state.allocator.length);
state.allocator.length <<= 1;
}
@ -5462,6 +5498,7 @@ static void beginFrame(void) {
state.active = true;
state.tick = gpu_begin();
state.stream = gpu_stream_begin();
state.scratchBufferIndex = 0;
state.allocator.cursor = 0;
processReadbacks();
}

View File

@ -176,6 +176,7 @@ Buffer* lovrBufferCreate(const BufferInfo* info, void** data);
void lovrBufferDestroy(void* ref);
const BufferInfo* lovrBufferGetInfo(Buffer* buffer);
bool lovrBufferIsTemporary(Buffer* buffer);
bool lovrBufferIsValid(Buffer* buffer);
void* lovrBufferMap(Buffer* buffer, uint32_t offset, uint32_t size);
void lovrBufferClear(Buffer* buffer, uint32_t offset, uint32_t size);