Optimize blendshapes;

To initialize the vertices to their default state, it's way faster to
use a branch in the compute shader rather than using a copy.
This commit is contained in:
bjorn 2023-06-16 19:18:48 -07:00
parent 639a9d4aca
commit 4ec065757d
2 changed files with 11 additions and 19 deletions

View File

@ -26,16 +26,17 @@ struct BlendVertex {
float tx, ty, tz;
};
layout(set = 0, binding = 0) buffer restrict Vertices { ModelVertex vertices[]; };
layout(set = 0, binding = 1) buffer restrict readonly BlendVertices { BlendVertex blendVertex[]; };
layout(set = 0, binding = 2) uniform Weights { vec4 weights[16]; };
layout(set = 0, binding = 0) buffer restrict RawVertices { ModelVertex rawVertices[]; };
layout(set = 0, binding = 1) buffer restrict Vertices { ModelVertex vertices[]; };
layout(set = 0, binding = 2) buffer restrict readonly BlendVertices { BlendVertex blendVertex[]; };
layout(set = 0, binding = 3) uniform Weights { vec4 weights[16]; };
void lovrmain() {
if (GlobalThreadID.x >= vertexCount) return;
uint vertexIndex = baseVertex + GlobalThreadID.x;
uint blendVertexIndex = baseBlendVertex + GlobalThreadID.x;
ModelVertex vertex = vertices[vertexIndex];
ModelVertex vertex = baseBlendVertex == 0 ? rawVertices[vertexIndex] : vertices[vertexIndex];
for (uint i = 0; i < blendShapeCount; i++, blendVertexIndex += vertexCount) {
float weight = weights[i / 4][i % 4];

View File

@ -4185,26 +4185,17 @@ static void lovrModelAnimateVertices(Model* model) {
beginFrame();
if (blend) {
gpu_buffer* src = model->rawVertexBuffer->gpu;
gpu_buffer* dst = model->vertexBuffer->gpu;
gpu_copy_buffers(state.stream, src, dst, 0, 0, data->dynamicVertexCount * sizeof(ModelVertex));
gpu_sync(state.stream, &(gpu_barrier) {
.prev = GPU_PHASE_TRANSFER,
.next = GPU_PHASE_SHADER_COMPUTE,
.flush = GPU_CACHE_TRANSFER_WRITE,
.clear = GPU_CACHE_STORAGE_READ | GPU_CACHE_STORAGE_WRITE
}, 1);
Shader* shader = lovrGraphicsGetDefaultShader(SHADER_BLENDER);
gpu_layout* layout = state.layouts.data[shader->layout].gpu;
uint32_t vertexCount = model->info.data->dynamicVertexCount;
uint32_t vertexCount = data->dynamicVertexCount;
uint32_t blendBufferCursor = 0;
uint32_t chunkSize = 64;
gpu_binding bindings[] = {
{ 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->vertexBuffer->gpu, 0, vertexCount * sizeof(ModelVertex) } },
{ 1, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->blendBuffer->gpu, 0, model->blendBuffer->info.size } },
{ 2, GPU_SLOT_UNIFORM_BUFFER, .buffer = { NULL, 0, chunkSize * sizeof(float) } }
{ 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->rawVertexBuffer->gpu, 0, vertexCount * sizeof(ModelVertex) } },
{ 1, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->vertexBuffer->gpu, 0, vertexCount * sizeof(ModelVertex) } },
{ 2, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->blendBuffer->gpu, 0, model->blendBuffer->info.size } },
{ 3, GPU_SLOT_UNIFORM_BUFFER, .buffer = { NULL, 0, chunkSize * sizeof(float) } }
};
gpu_compute_begin(state.stream);
@ -4218,7 +4209,7 @@ static void lovrModelAnimateVertices(Model* model) {
MappedBuffer mapped = mapBuffer(&state.streamBuffers, chunkSize * sizeof(float), state.limits.uniformBufferAlign);
memcpy(mapped.pointer, model->blendShapeWeights + group->index + j, count * sizeof(float));
bindings[2].buffer = (gpu_buffer_binding) { mapped.buffer, mapped.offset, mapped.extent };
bindings[3].buffer = (gpu_buffer_binding) { mapped.buffer, mapped.offset, mapped.extent };
gpu_bundle* bundle = getBundle(shader->layout);
gpu_bundle_info bundleInfo = { layout, bindings, COUNTOF(bindings) };