From 42a924b0ee2808a2998a663ad962cb5b17f36a48 Mon Sep 17 00:00:00 2001 From: bjorn Date: Mon, 30 May 2022 15:36:31 -0700 Subject: [PATCH] lovrPassDraw; Pass:points; --- src/api/l_graphics_pass.c | 76 ++++++++ src/modules/graphics/graphics.c | 329 +++++++++++++++++++++++++++++++- src/modules/graphics/graphics.h | 1 + 3 files changed, 405 insertions(+), 1 deletion(-) diff --git a/src/api/l_graphics_pass.c b/src/api/l_graphics_pass.c index 15e82b70..77b42949 100644 --- a/src/api/l_graphics_pass.c +++ b/src/api/l_graphics_pass.c @@ -344,6 +344,80 @@ static int l_lovrPassSend(lua_State* L) { return luax_typeerror(L, 3, "Buffer, Texture, Sampler, or number/vector"); } +static uint32_t luax_getvertexcount(lua_State* L, int index) { + switch (lua_type(L, index)) { + case LUA_TNONE: + case LUA_TNIL: + return 0; + case LUA_TNUMBER: + return (lua_gettop(L) - index + 1) / 3; + case LUA_TTABLE: + lua_rawgeti(L, index, 1); + int innerType = lua_type(L, -1); + lua_pop(L, 1); + return luax_len(L, index) / (innerType == LUA_TNUMBER ? 3 : 1); + case LUA_TUSERDATA: + return lua_gettop(L) - index + 1; + default: + return luax_typeerror(L, index, "number, table, or vector"); + } +} + +static void luax_readvertices(lua_State* L, int index, float* vertices, uint32_t count) { + switch (lua_type(L, index)) { + case LUA_TNONE: + case LUA_TNIL: + default: + break; + case LUA_TNUMBER: + for (uint32_t i = 0; i < 3 * count; i++) { + *vertices++ = luax_tofloat(L, index + i); + } + break; + case LUA_TTABLE: + lua_rawgeti(L, index, 1); + int innerType = lua_type(L, -1); + lua_pop(L, 1); + if (innerType == LUA_TNUMBER) { + for (uint32_t i = 0; i < 3 * count; i++) { + lua_rawgeti(L, index, i + 1); + *vertices++ = luax_tofloat(L, -1); + lua_pop(L, 1); + } + } else if (innerType == LUA_TUSERDATA) { + for (uint32_t i = 0; i < count; i++) { + lua_rawgeti(L, index, i + 1); + vec3_init(vertices, luax_checkvector(L, -1, V_VEC3, NULL)); + lua_pop(L, 1); + vertices += 3; + } + } + break; + case LUA_TUSERDATA: + for (uint32_t i = 0; i < count; i++) { + vec3_init(vertices, luax_checkvector(L, index + i, V_VEC3, NULL)); + vertices += 3; + } + break; + } +} + +static int l_lovrPassPoints(lua_State* L) { + Pass* pass = luax_checktype(L, 1, Pass); + Buffer* buffer = luax_totype(L, 2, Buffer); + + if (buffer || !lua_toboolean(L, 2)) { + // + } else { + float* vertices; + uint32_t count = luax_getvertexcount(L, 2); + lovrPassPoints(pass, count, &vertices); + luax_readvertices(L, 2, vertices, count); + } + + return 0; +} + static int l_lovrPassClear(lua_State* L) { Pass* pass = luax_checktype(L, 1, Pass); @@ -519,6 +593,8 @@ const luaL_Reg lovrPass[] = { { "send", l_lovrPassSend }, + { "points", l_lovrPassPoints }, + { "clear", l_lovrPassClear }, { "copy", l_lovrPassCopy }, { "blit", l_lovrPassBlit }, diff --git a/src/modules/graphics/graphics.c b/src/modules/graphics/graphics.c index c7557b6d..7c5896be 100644 --- a/src/modules/graphics/graphics.c +++ b/src/modules/graphics/graphics.c @@ -95,10 +95,17 @@ typedef struct { typedef struct { float color[4]; Shader* shader; + uint64_t formatHash; gpu_pipeline_info info; bool dirty; } Pipeline; +typedef struct { + float transform[16]; + float cofactor[16]; + float color[4]; +} DrawData; + struct Pass { uint32_t ref; PassInfo info; @@ -109,12 +116,19 @@ struct Pass { Pipeline* pipeline; Pipeline pipelines[4]; uint32_t pipelineIndex; + char constants[256]; + bool constantsDirty; gpu_binding bindings[32]; uint32_t bindingMask; bool bindingsDirty; Camera* cameras; uint32_t cameraCount; bool cameraDirty; + DrawData* drawData; + uint32_t drawCount; + gpu_binding builtins[3]; + gpu_buffer* vertexBuffer; + gpu_buffer* indexBuffer; }; typedef enum { @@ -126,6 +140,32 @@ typedef enum { DEFAULT_FORMAT_COUNT } DefaultFormat; +typedef struct { + gpu_draw_mode mode; + DefaultShader shader; + float* transform; + struct { + Buffer* buffer; + DefaultFormat format; + const void* data; + void** pointer; + uint32_t count; + } vertex; + struct { + Buffer* buffer; + const void* data; + void** pointer; + uint32_t count; + uint32_t stride; + } index; + uint32_t start; + uint32_t count; + uint32_t instances; + uint32_t base; + Buffer* indirect; + uint32_t offset; +} Draw; + typedef struct { void* next; gpu_bundle_pool* gpu; @@ -225,6 +265,7 @@ bool lovrGraphicsInit(bool debug, bool vsync) { arr_init(&state.pipelines, realloc); arr_init(&state.layouts, realloc); + // Layout for builtin bindings gpu_slot builtins[] = { { 0, GPU_SLOT_UNIFORM_BUFFER, GPU_STAGE_ALL }, // Cameras { 1, GPU_SLOT_UNIFORM_BUFFER, GPU_STAGE_ALL }, // Draw data @@ -238,6 +279,7 @@ bool lovrGraphicsInit(bool debug, bool vsync) { state.allocator.memory = os_vm_init(MAX_FRAME_MEMORY); os_vm_commit(state.allocator.memory, state.allocator.length); + // Default resources BufferInfo defaultBufferInfo = { .length = 4096, .stride = 1, @@ -282,6 +324,20 @@ bool lovrGraphicsInit(bool debug, bool vsync) { memset(zeros, 0, defaultBufferInfo.length); + state.vertexFormats[VERTEX_POINT].gpu = (gpu_vertex_format) { + .bufferCount = 1, + .attributeCount = 1, + .bufferStrides[0] = 12, + .attributes[0] = { 0, 0, 0, GPU_TYPE_F32x3 } + }; + + for (uint32_t i = 0; i < DEFAULT_FORMAT_COUNT; i++) { + for (uint32_t j = 0; j < state.vertexFormats[i].gpu.attributeCount; j++) { + state.vertexFormats[i].mask |= (1 << state.vertexFormats[i].gpu.attributes[j].location); + } + state.vertexFormats[i].hash = hash64(&state.vertexFormats[i].gpu, sizeof(gpu_vertex_format)); + } + state.initialized = true; return true; } @@ -369,7 +425,7 @@ void lovrGraphicsGetLimits(GraphicsLimits* limits) { memcpy(limits->computeWorkgroupSize, state.limits.computeWorkgroupSize, 3 * sizeof(uint32_t)); limits->computeWorkgroupVolume = state.limits.computeWorkgroupVolume; limits->computeSharedMemory = state.limits.computeSharedMemory; - limits->shaderConstantSize = state.limits.pushConstantSize; + limits->shaderConstantSize = MIN(state.limits.pushConstantSize, 256); limits->indirectDrawCount = state.limits.indirectDrawCount; limits->instances = state.limits.instances; limits->anisotropy = state.limits.anisotropy; @@ -941,6 +997,8 @@ Shader* lovrGraphicsGetDefaultShader(DefaultShader type) { } state.defaultShaders[type] = lovrShaderCreate(&info); + info.stages[0]->data = NULL; + info.stages[1]->data = NULL; lovrRelease(info.stages[0], lovrBlobDestroy); lovrRelease(info.stages[1], lovrBlobDestroy); return state.defaultShaders[type]; @@ -1274,6 +1332,17 @@ Pass* lovrGraphicsGetPass(PassInfo* info) { pass->pipeline = &pass->pipelines[0]; memset(&pass->pipeline->info, 0, sizeof(pass->pipeline->info)); + + pass->pipeline->info.colorCount = colorTextureCount; + for (uint32_t i = 0; i < colorTextureCount; i++) { + pass->pipeline->info.color[i].format = canvas->textures[i]->info.format; + pass->pipeline->info.color[i].srgb = canvas->textures[i]->info.srgb; + } + + pass->pipeline->info.depth.format = canvas->depth.texture ? canvas->depth.texture->info.format : canvas->depth.format; + pass->pipeline->info.viewCount = main->depth; + pass->pipeline->info.multisample.count = canvas->samples; + pass->pipeline->info.depth.test = GPU_COMPARE_LEQUAL; pass->pipeline->info.depth.write = true; pass->pipeline->info.color[0].mask = 0xf; @@ -1287,13 +1356,35 @@ Pass* lovrGraphicsGetPass(PassInfo* info) { pass->pipeline->shader = NULL; pass->pipeline->dirty = true; + memset(pass->constants, 0, sizeof(pass->constants)); + pass->constantsDirty = true; + pass->bindingMask = 0; pass->bindingsDirty = true; pass->cameraCount = main->depth; pass->cameras = tempAlloc(pass->cameraCount * sizeof(Camera)); + + for (uint32_t i = 0; i < pass->cameraCount; i++) { + mat4_identity(pass->cameras[i].view); + float fov = 1.0f; + float aspect = (float) main->width / main->height; + mat4_perspective(pass->cameras[i].projection, .01f, 100.f, fov, aspect); + } pass->cameraDirty = true; + pass->drawCount = 0; + + gpu_buffer_binding cameras = { tempAlloc(gpu_sizeof_buffer()), 0, pass->cameraCount * sizeof(Camera) }; + gpu_buffer_binding draws = { tempAlloc(gpu_sizeof_buffer()), 0, 256 * sizeof(DrawData) }; + + pass->builtins[0] = (gpu_binding) { 0, GPU_SLOT_UNIFORM_BUFFER, .buffer = cameras }; + pass->builtins[1] = (gpu_binding) { 1, GPU_SLOT_UNIFORM_BUFFER, .buffer = draws }; + pass->builtins[2] = (gpu_binding) { 2, GPU_SLOT_SAMPLER, .sampler = state.defaultSampler->gpu }; + + pass->vertexBuffer = NULL; + pass->indexBuffer = NULL; + return pass; } @@ -1659,6 +1750,242 @@ void lovrPassSendSampler(Pass* pass, const char* name, size_t length, uint32_t s pass->bindingsDirty = true; } +static void flushPipeline(Pass* pass, Draw* draw, Shader* shader) { + Pipeline* pipeline = pass->pipeline; + + if (pipeline->info.drawMode != draw->mode) { + pipeline->info.drawMode = draw->mode; + pipeline->dirty = true; + } + + if (!pipeline->info.shader && pipeline->info.shader != shader->gpu) { + pipeline->info.shader = shader->gpu; + pipeline->info.flags = NULL; + pipeline->info.flagCount = 0; + pipeline->dirty = true; + } + + VertexFormat* format = draw->vertex.buffer ? &draw->vertex.buffer->format : &state.vertexFormats[draw->vertex.format]; + + if (format->hash != pipeline->formatHash) { + pipeline->info.vertex = format->gpu; + gpu_vertex_format* vertex = &pipeline->info.vertex; + uint32_t missingAttributes = shader->attributeMask & ~format->mask; + + if (missingAttributes) { + vertex->bufferCount++; + vertex->bufferStrides[1] = 0; + for (uint32_t i = 0; i < 32 && missingAttributes; i++) { + if (missingAttributes & (1 << i)) { // TODO clz + missingAttributes &= ~(1 << i); + vertex->attributes[vertex->attributeCount++] = (gpu_attribute) { + .buffer = 1, + .location = i, + .offset = 0, + .type = GPU_TYPE_F32x4 + }; + } + } + } + + pipeline->formatHash = format->hash; + pipeline->dirty = true; + } + + if (!pipeline->dirty) { + return; + } + + uint64_t hash = hash64(&pipeline->info, sizeof(pipeline->info)); + uint64_t index = map_get(&state.pipelineLookup, hash); + + if (index == MAP_NIL) { + gpu_pipeline* gpu = malloc(gpu_sizeof_pipeline()); + lovrAssert(gpu, "Out of memory"); + gpu_pipeline_init_graphics(gpu, &pipeline->info); + index = state.pipelines.length; + arr_push(&state.pipelines, gpu); + map_set(&state.pipelineLookup, hash, index); + } + + gpu_bind_pipeline(pass->stream, state.pipelines.data[index], false); + pipeline->dirty = false; +} + +static void flushConstants(Pass* pass, Shader* shader) { + if (pass->constantsDirty && shader->constantSize > 0) { + gpu_push_constants(pass->stream, shader->gpu, pass->constants, shader->constantSize); + pass->constantsDirty = false; + } +} + +static void flushBindings(Pass* pass, Shader* shader) { + if (!pass->bindingsDirty || shader->resourceCount == 0) { + return; + } + + gpu_binding* bindings = tempAlloc(shader->resourceCount * sizeof(gpu_binding)); + + for (uint32_t i = 0; i < shader->resourceCount; i++) { + bindings[i] = pass->bindings[shader->resources[i].binding]; + } + + gpu_bundle_info info = { + .layout = state.layouts.data[shader->layout].gpu, + .bindings = bindings, + .count = shader->resourceCount + }; + + gpu_bundle* bundle = getBundle(shader->layout); + gpu_bundle_write(&bundle, &info, 1); + gpu_bind_bundle(pass->stream, shader->gpu, 2, bundle, NULL, 0); +} + +static void flushBuiltins(Pass* pass, Draw* draw, Shader* shader) { + bool rebind = false; + + if (pass->cameraDirty) { + for (uint32_t i = 0; i < pass->cameraCount; i++) { + mat4_init(pass->cameras[i].viewProjection, pass->cameras[i].projection); + mat4_init(pass->cameras[i].inverseProjection, pass->cameras[i].projection); + mat4_mul(pass->cameras[i].viewProjection, pass->cameras[i].view); + mat4_invert(pass->cameras[i].inverseProjection); + } + + uint32_t size = pass->cameraCount * sizeof(Camera); + void* data = gpu_map(pass->builtins[0].buffer.object, size, state.limits.uniformBufferAlign, GPU_MAP_WRITE); + memcpy(data, pass->cameras, size); + pass->cameraDirty = false; + rebind = true; + } + + if (pass->drawCount % 256 == 0) { + uint32_t size = 256 * sizeof(DrawData); + pass->drawData = gpu_map(pass->builtins[1].buffer.object, size, state.limits.uniformBufferAlign, GPU_MAP_WRITE); + rebind = true; + } + + if (rebind) { + gpu_bundle_info bundleInfo = { + .layout = state.layouts.data[state.builtinLayout].gpu, + .bindings = pass->builtins, + .count = COUNTOF(pass->builtins) + }; + + gpu_bundle* bundle = getBundle(state.builtinLayout); + gpu_bundle_write(&bundle, &bundleInfo, 1); + gpu_bind_bundle(pass->stream, shader->gpu, 0, bundle, NULL, 0); + } + + float m[16]; + float* transform; + if (draw->transform) { + transform = mat4_mul(mat4_init(m, pass->transform), draw->transform); + } else { + transform = pass->transform; + } + + float cofactor[16]; + mat4_init(cofactor, transform); + mat4_cofactor(cofactor); + + memcpy(pass->drawData->transform, transform, 64); + memcpy(pass->drawData->cofactor, cofactor, 64); + memcpy(pass->drawData->color, pass->pipeline->color, 16); + pass->drawData++; +} + +static void flushBuffers(Pass* pass, Draw* draw) { + uint32_t vertexOffsets[2] = { 0, 0 }; + + if (!draw->vertex.buffer && draw->vertex.count > 0) { + uint32_t stride = state.vertexFormats[draw->vertex.format].gpu.bufferStrides[0]; + uint32_t size = draw->vertex.count * stride; + + gpu_buffer* scratchpad = tempAlloc(gpu_sizeof_buffer()); + void* pointer = gpu_map(scratchpad, size, stride, GPU_MAP_WRITE); + + if (draw->vertex.pointer) { + *draw->vertex.pointer = pointer; + } else { + memcpy(pointer, draw->vertex.data, size); + } + + gpu_buffer* buffers[2] = { scratchpad, state.defaultBuffer->gpu }; + gpu_bind_vertex_buffers(pass->stream, buffers, vertexOffsets, 0, 2); + pass->vertexBuffer = NULL; + } else if (draw->vertex.buffer && draw->vertex.buffer->gpu != pass->vertexBuffer) { + lovrCheck(draw->vertex.buffer->info.stride <= state.limits.vertexBufferStride, "Vertex buffer stride exceeds vertexBufferStride limit"); + gpu_buffer* buffers[2] = { draw->vertex.buffer->gpu, state.defaultBuffer->gpu }; + gpu_bind_vertex_buffers(pass->stream, buffers, vertexOffsets, 0, 2); + pass->vertexBuffer = draw->vertex.buffer->gpu; + } + + if (!draw->index.buffer && draw->index.count > 0) { + uint32_t stride = draw->index.stride ? draw->index.stride : sizeof(uint16_t); + uint32_t size = draw->index.count * stride; + + gpu_buffer* scratchpad = tempAlloc(gpu_sizeof_buffer()); + void* pointer = gpu_map(scratchpad, size, stride, GPU_MAP_WRITE); + + if (draw->index.pointer) { + *draw->index.pointer = pointer; + } else { + memcpy(pointer, draw->index.data, size); + } + + gpu_index_type type = stride == 4 ? GPU_INDEX_U32 : GPU_INDEX_U16; + gpu_bind_index_buffer(pass->stream, scratchpad, 0, type); + pass->indexBuffer = NULL; + } else if (draw->index.buffer && draw->index.buffer->gpu != pass->indexBuffer) { + gpu_index_type type = draw->index.buffer->info.stride == 4 ? GPU_INDEX_U32 : GPU_INDEX_U16; + gpu_bind_index_buffer(pass->stream, draw->index.buffer->gpu, 0, type); + pass->indexBuffer = draw->index.buffer->gpu; + } +} + +static void lovrPassDraw(Pass* pass, Draw* draw) { + lovrCheck(pass->info.type == PASS_RENDER, "This function can only be called on a render pass"); + Shader* shader = pass->pipeline->shader ? pass->pipeline->shader : lovrGraphicsGetDefaultShader(draw->shader); + + flushPipeline(pass, draw, shader); + flushConstants(pass, shader); + flushBindings(pass, shader); + flushBuiltins(pass, draw, shader); + flushBuffers(pass, draw); + + bool indexed = draw->index.buffer || draw->index.count > 0; + uint32_t defaultCount = draw->index.count > 0 ? draw->index.count : draw->vertex.count; + uint32_t count = draw->count > 0 ? draw->count : defaultCount; + uint32_t instances = MAX(draw->instances, 1); + uint32_t id = pass->drawCount & 0xff; + + if (draw->indirect) { + if (indexed) { + gpu_draw_indirect_indexed(pass->stream, draw->indirect->gpu, draw->offset, count); + } else { + gpu_draw_indirect(pass->stream, draw->indirect->gpu, draw->offset, count); + } + } else { + if (indexed) { + gpu_draw_indexed(pass->stream, count, instances, draw->start, draw->base, id); + } else { + gpu_draw(pass->stream, count, instances, draw->start, id); + } + } + + pass->drawCount++; +} + +void lovrPassPoints(Pass* pass, uint32_t count, float** vertices) { + lovrPassDraw(pass, &(Draw) { + .mode = GPU_DRAW_POINTS, + .vertex.format = VERTEX_POINT, + .vertex.pointer = (void**) vertices, + .vertex.count = count + }); +} + void lovrPassClearBuffer(Pass* pass, Buffer* buffer, uint32_t offset, uint32_t extent) { if (extent == 0) return; if (extent == ~0u) extent = buffer->size - offset; diff --git a/src/modules/graphics/graphics.h b/src/modules/graphics/graphics.h index c4d1a36b..d4f7dc7b 100644 --- a/src/modules/graphics/graphics.h +++ b/src/modules/graphics/graphics.h @@ -382,6 +382,7 @@ void lovrPassSetWireframe(Pass* pass, bool wireframe); void lovrPassSendBuffer(Pass* pass, const char* name, size_t length, uint32_t slot, Buffer* buffer, uint32_t offset, uint32_t extent); void lovrPassSendTexture(Pass* pass, const char* name, size_t length, uint32_t slot, Texture* texture); void lovrPassSendSampler(Pass* pass, const char* name, size_t length, uint32_t slot, Sampler* sampler); +void lovrPassPoints(Pass* pass, uint32_t count, float** vertices); void lovrPassClearBuffer(Pass* pass, Buffer* buffer, uint32_t offset, uint32_t extent); void lovrPassClearTexture(Pass* pass, Texture* texture, float value[4], uint32_t layer, uint32_t layerCount, uint32_t level, uint32_t levelCount); void lovrPassCopyDataToBuffer(Pass* pass, void* data, Buffer* buffer, uint32_t offset, uint32_t size);