Optimize blendshapes;

To initialize the vertices to their default state, it's way faster to use a branch in the compute shader rather than using a copy.
2023-06-16 19:18:48 -07:00 · 2023-06-16 19:18:48 -07:00 · 4ec065757d
parent 639a9d4aca
commit 4ec065757d
2 changed files with 11 additions and 19 deletions
--- a/etc/shaders/blender.comp
+++ b/etc/shaders/blender.comp
@ -26,16 +26,17 @@ struct BlendVertex {
  float tx, ty, tz;
 };

-layout(set = 0, binding = 0) buffer restrict Vertices { ModelVertex vertices[]; };
-layout(set = 0, binding = 1) buffer restrict readonly BlendVertices { BlendVertex blendVertex[]; };
-layout(set = 0, binding = 2) uniform Weights { vec4 weights[16]; };
+layout(set = 0, binding = 0) buffer restrict RawVertices { ModelVertex rawVertices[]; };
+layout(set = 0, binding = 1) buffer restrict Vertices { ModelVertex vertices[]; };
+layout(set = 0, binding = 2) buffer restrict readonly BlendVertices { BlendVertex blendVertex[]; };
+layout(set = 0, binding = 3) uniform Weights { vec4 weights[16]; };

 void lovrmain() {
  if (GlobalThreadID.x >= vertexCount) return;
  uint vertexIndex = baseVertex + GlobalThreadID.x;
  uint blendVertexIndex = baseBlendVertex + GlobalThreadID.x;

-  ModelVertex vertex = vertices[vertexIndex];
+  ModelVertex vertex = baseBlendVertex == 0 ? rawVertices[vertexIndex] : vertices[vertexIndex];

  for (uint i = 0; i < blendShapeCount; i++, blendVertexIndex += vertexCount) {
    float weight = weights[i / 4][i % 4];
--- a/src/modules/graphics/graphics.c
+++ b/src/modules/graphics/graphics.c
@ -4185,26 +4185,17 @@ static void lovrModelAnimateVertices(Model* model) {
  beginFrame();

  if (blend) {
-    gpu_buffer* src = model->rawVertexBuffer->gpu;
-    gpu_buffer* dst = model->vertexBuffer->gpu;
-    gpu_copy_buffers(state.stream, src, dst, 0, 0, data->dynamicVertexCount * sizeof(ModelVertex));
-    gpu_sync(state.stream, &(gpu_barrier) {
-      .prev = GPU_PHASE_TRANSFER,
-      .next = GPU_PHASE_SHADER_COMPUTE,
-      .flush = GPU_CACHE_TRANSFER_WRITE,
-      .clear = GPU_CACHE_STORAGE_READ | GPU_CACHE_STORAGE_WRITE
-    }, 1);
-
    Shader* shader = lovrGraphicsGetDefaultShader(SHADER_BLENDER);
    gpu_layout* layout = state.layouts.data[shader->layout].gpu;
-    uint32_t vertexCount = model->info.data->dynamicVertexCount;
+    uint32_t vertexCount = data->dynamicVertexCount;
    uint32_t blendBufferCursor = 0;
    uint32_t chunkSize = 64;

    gpu_binding bindings[] = {
-      { 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->vertexBuffer->gpu, 0, vertexCount * sizeof(ModelVertex) } },
-      { 1, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->blendBuffer->gpu, 0, model->blendBuffer->info.size } },
-      { 2, GPU_SLOT_UNIFORM_BUFFER, .buffer = { NULL, 0, chunkSize * sizeof(float) } }
+      { 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->rawVertexBuffer->gpu, 0, vertexCount * sizeof(ModelVertex) } },
+      { 1, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->vertexBuffer->gpu, 0, vertexCount * sizeof(ModelVertex) } },
+      { 2, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->blendBuffer->gpu, 0, model->blendBuffer->info.size } },
+      { 3, GPU_SLOT_UNIFORM_BUFFER, .buffer = { NULL, 0, chunkSize * sizeof(float) } }
    };

    gpu_compute_begin(state.stream);
@ -4218,7 +4209,7 @@ static void lovrModelAnimateVertices(Model* model) {

        MappedBuffer mapped = mapBuffer(&state.streamBuffers, chunkSize * sizeof(float), state.limits.uniformBufferAlign);
        memcpy(mapped.pointer, model->blendShapeWeights + group->index + j, count * sizeof(float));
-        bindings[2].buffer = (gpu_buffer_binding) { mapped.buffer, mapped.offset, mapped.extent };
+        bindings[3].buffer = (gpu_buffer_binding) { mapped.buffer, mapped.offset, mapped.extent };

        gpu_bundle* bundle = getBundle(shader->layout);
        gpu_bundle_info bundleInfo = { layout, bindings, COUNTOF(bindings) };