rm pass uniformSize;

It's no longer necessary.
rm pass binding mask;
2024-02-25 14:58:11 -08:00 · 2024-02-25 14:57:27 -08:00 · 2024-02-24 15:45:30 -08:00 · 2024-02-24 15:45:10 -08:00 · 2024-02-24 15:33:09 -08:00 · 2024-02-24 14:34:29 -08:00
5 changed files with 256 additions and 232 deletions
--- a/src/api/l_graphics.c
+++ b/src/api/l_graphics.c
@ -1072,10 +1072,12 @@ static int l_lovrGraphicsNewShader(lua_State* L) {
  Shader* shader = lovrShaderCreate(&info);
  luax_pushtype(L, Shader, shader);
  lovrRelease(shader, lovrShaderDestroy);
-  if (shouldFree[0]) free((void*) source[0].code);
-  if (shouldFree[1]) free((void*) source[1].code);
-  if (source[0].code != compiled[0].code) free((void*) compiled[0].code);
-  if (source[1].code != compiled[1].code) free((void*) compiled[1].code);
+
+  for (uint32_t i = 0; i < info.stageCount; i++) {
+    if (shouldFree[i]) free((void*) source[i].code);
+    if (source[i].code != compiled[i].code) free((void*) compiled[i].code);
+  }
+
  arr_free(&flags);
  return 1;
 }
--- a/src/core/gpu.h
+++ b/src/core/gpu.h
@ -245,16 +245,16 @@ void gpu_layout_destroy(gpu_layout* layout);
 // Shader

 typedef struct {
+  uint32_t stage;
  const void* code;
  size_t length;
-} gpu_shader_stage;
+} gpu_shader_source;

 typedef struct {
-  gpu_shader_stage vertex;
-  gpu_shader_stage fragment;
-  gpu_shader_stage compute;
-  gpu_layout* layouts[4];
+  uint32_t stageCount;
+  gpu_shader_source* stages;
  uint32_t pushConstantSize;
+  gpu_layout* layouts[4];
  const char* label;
 } gpu_shader_info;

--- a/src/core/gpu_vk.c
+++ b/src/core/gpu_vk.c
@ -988,31 +988,28 @@ void gpu_layout_destroy(gpu_layout* layout) {
 // Shader

 bool gpu_shader_init(gpu_shader* shader, gpu_shader_info* info) {
-  struct { VkShaderStageFlags flags; gpu_shader_stage* source; } stages[] = {
-    { VK_SHADER_STAGE_VERTEX_BIT, &info->vertex },
-    { VK_SHADER_STAGE_FRAGMENT_BIT, &info->fragment },
-    { VK_SHADER_STAGE_COMPUTE_BIT, &info->compute }
-  };
-
-  uint32_t stageCount = 0;
  VkShaderStageFlags stageFlags = 0;
-  for (uint32_t i = 0; i < COUNTOF(stages); i++) {
-    if (!stages[i].source->code) continue;
+  for (uint32_t i = 0; i < info->stageCount; i++) {
+    switch (info->stages[i].stage) {
+      case GPU_STAGE_VERTEX: stageFlags |= VK_SHADER_STAGE_VERTEX_BIT; break;
+      case GPU_STAGE_FRAGMENT: stageFlags |= VK_SHADER_STAGE_FRAGMENT_BIT; break;
+      case GPU_STAGE_COMPUTE: stageFlags |= VK_SHADER_STAGE_COMPUTE_BIT; break;
+      default: return false;
+    }
+  }

+  for (uint32_t i = 0; i < info->stageCount; i++) {
    VkShaderModuleCreateInfo moduleInfo = {
      .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
-      .codeSize = stages[i].source->length,
-      .pCode = stages[i].source->code
+      .codeSize = info->stages[i].length,
+      .pCode = info->stages[i].code
    };

-    VK(vkCreateShaderModule(state.device, &moduleInfo, NULL, &shader->handles[stageCount]), "Failed to load shader") {
+    VK(vkCreateShaderModule(state.device, &moduleInfo, NULL, &shader->handles[i]), "Failed to load shader") {
      return false;
    }

    nickname(shader->handles[i], VK_OBJECT_TYPE_SHADER_MODULE, info->label);
-
-    stageFlags |= stages[i].flags;
-    stageCount++;
  }

  VkDescriptorSetLayout layouts[4];
--- a/src/modules/graphics/graphics.c
+++ b/src/modules/graphics/graphics.c
@ -30,7 +30,7 @@
 #define PIPELINE_STACK_SIZE 4
 #define MAX_SHADER_RESOURCES 32
 #define MAX_CUSTOM_ATTRIBUTES 10
-#define LAYOUT_BUILTIN 0
+#define LAYOUT_BUILTINS 0
 #define LAYOUT_MATERIAL 1
 #define LAYOUT_UNIFORMS 2
 #define FLOAT_BITS(f) ((union { float f; uint32_t u; }) { f }).u
@ -525,9 +525,7 @@ struct Pass {
  uint32_t transformIndex;
  uint32_t pipelineIndex;
  gpu_binding* bindings;
-  uint32_t bindingMask;
  void* uniforms;
-  uint32_t uniformSize;
  uint32_t computeCount;
  Compute* computes;
  uint32_t drawCount;
@ -698,7 +696,7 @@ bool lovrGraphicsInit(GraphicsConfig* config) {
  };

  size_t builtinLayout = getLayout(builtinSlots, COUNTOF(builtinSlots));
-  if (builtinLayout != LAYOUT_BUILTIN) lovrUnreachable();
+  if (builtinLayout != LAYOUT_BUILTINS) lovrUnreachable();

  gpu_slot materialSlots[] = {
    { 0, GPU_SLOT_UNIFORM_BUFFER, GPU_STAGE_GRAPHICS }, // Data
@ -1293,7 +1291,7 @@ static void recordRenderPass(Pass* pass, gpu_stream* stream) {
    data->color[3] = draw->color[3];
  }

-  gpu_bundle* builtinBundle = getBundle(LAYOUT_BUILTIN, builtins, COUNTOF(builtins));
+  gpu_bundle* builtinBundle = getBundle(LAYOUT_BUILTINS, builtins, COUNTOF(builtins));

  // Pipelines

@ -2820,7 +2818,8 @@ Shader* lovrGraphicsGetDefaultShader(DefaultShader type) {
        },
        .stageCount = 1,
        .flags = &(ShaderFlag) { NULL, 0, state.device.subgroupSize },
-        .flagCount = 1
+        .flagCount = 1,
+        .isDefault = true
      });
    default:
      return state.defaultShaders[type] = lovrShaderCreate(&(ShaderInfo) {
@ -2829,7 +2828,8 @@ Shader* lovrGraphicsGetDefaultShader(DefaultShader type) {
          lovrGraphicsGetDefaultShaderSource(type, STAGE_VERTEX),
          lovrGraphicsGetDefaultShaderSource(type, STAGE_FRAGMENT)
        },
-        .stageCount = 2
+        .stageCount = 2,
+        .isDefault = true
      });
  }
 }
@ -2837,9 +2837,11 @@ Shader* lovrGraphicsGetDefaultShader(DefaultShader type) {
 Shader* lovrShaderCreate(const ShaderInfo* info) {
  Shader* shader = calloc(1, sizeof(Shader) + gpu_sizeof_shader());
  lovrAssert(shader, "Out of memory");
+  shader->ref = 1;
+  shader->gpu = (gpu_shader*) (shader + 1);
+  shader->info = *info;

-  size_t stack = tempPush(&state.allocator);
-
+  // Validate stage combinations
  for (uint32_t i = 0; i < info->stageCount; i++) {
    shader->stageMask |= (1 << info->stages[i].stage);
  }
@ -2850,7 +2852,9 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
    lovrCheck(shader->stageMask == FLAG_COMPUTE, "Compute shaders can only have a compute stage");
  }

-  // Copy the source, because we perform edits on the SPIR-V and the input might be readonly memory
+  size_t stack = tempPush(&state.allocator);
+
+  // Copy the source to temp memory (we perform edits on the SPIR-V and the input might be readonly)
  void* source[2];
  for (uint32_t i = 0; i < info->stageCount; i++) {
    source[i] = tempAlloc(&state.allocator, info->stages[i].size);
@ -2891,25 +2895,31 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
    }
  }

-  // Allocate
-  gpu_slot* slots = tempAlloc(&state.allocator, maxResources * sizeof(gpu_slot));
+  // Allocate memory
  shader->resources = malloc(maxResources * sizeof(ShaderResource));
  shader->fields = malloc(maxFields * sizeof(DataField));
+  shader->names = malloc(maxChars);
  shader->flags = malloc(maxSpecConstants * sizeof(gpu_shader_flag));
  shader->flagLookup = malloc(maxSpecConstants * sizeof(uint32_t));
-  shader->names = malloc(maxChars);
-  lovrAssert(shader->resources && shader->fields && shader->names, "Out of memory");
-  lovrAssert(shader->flags && shader->flagLookup, "Out of memory");
+  lovrAssert(shader->resources, "Out of memory");
+  lovrAssert(shader->fields, "Out of memory");
+  lovrAssert(shader->names, "Out of memory");
+  lovrAssert(shader->flags, "Out of memory");
+  lovrAssert(shader->flagLookup, "Out of memory");

-  // Stage-specific metadata
+  // Workgroup size
  if (info->type == SHADER_COMPUTE) {
-    memcpy(shader->workgroupSize, spv[0].workgroupSize, 3 * sizeof(uint32_t));
-    lovrCheck(shader->workgroupSize[0] <= state.limits.workgroupSize[0], "Shader workgroup size exceeds the 'workgroupSize' limit");
-    lovrCheck(shader->workgroupSize[1] <= state.limits.workgroupSize[1], "Shader workgroup size exceeds the 'workgroupSize' limit");
-    lovrCheck(shader->workgroupSize[2] <= state.limits.workgroupSize[2], "Shader workgroup size exceeds the 'workgroupSize' limit");
-    uint32_t totalWorkgroupSize = shader->workgroupSize[0] * shader->workgroupSize[1] * shader->workgroupSize[2];
+    uint32_t* workgroupSize = spv[0].workgroupSize;
+    uint32_t totalWorkgroupSize = workgroupSize[0] * workgroupSize[1] * workgroupSize[2];
+    lovrCheck(workgroupSize[0] <= state.limits.workgroupSize[0], "Shader workgroup size exceeds the 'workgroupSize' limit");
+    lovrCheck(workgroupSize[1] <= state.limits.workgroupSize[1], "Shader workgroup size exceeds the 'workgroupSize' limit");
+    lovrCheck(workgroupSize[2] <= state.limits.workgroupSize[2], "Shader workgroup size exceeds the 'workgroupSize' limit");
    lovrCheck(totalWorkgroupSize <= state.limits.totalWorkgroupSize, "Shader workgroup size exceeds the 'totalWorkgroupSize' limit");
-  } else if (spv[0].attributeCount > 0) {
+    memcpy(shader->workgroupSize, workgroupSize, 3 * sizeof(uint32_t));
+  }
+
+  // Vertex attributes
+  if (info->type == SHADER_GRAPHICS && spv[0].attributeCount > 0) {
    shader->attributeCount = spv[0].attributeCount;
    shader->attributes = malloc(shader->attributeCount * sizeof(ShaderAttribute));
    lovrAssert(shader->attributes, "Out of memory");
@ -2922,71 +2932,18 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {

  uint32_t resourceSet = info->type == SHADER_COMPUTE ? 0 : 2;
  uint32_t uniformSet = info->type == SHADER_COMPUTE ? 1 : 3;
-  uint32_t lastResourceCount = 0;

  // Resources
-  for (uint32_t s = 0; s < info->stageCount; s++, lastResourceCount = shader->resourceCount) {
+  for (uint32_t s = 0, lastResourceCount = 0; s < info->stageCount; s++, lastResourceCount = shader->resourceCount) {
    ShaderStage stage = info->stages[s].stage;
    for (uint32_t i = 0; i < spv[s].resourceCount; i++) {
      spv_resource* resource = &spv[s].resources[i];
+
+      // It's safe to cast away const because we are operating on a copy of the input
      uint32_t* set = (uint32_t*) resource->set;
      uint32_t* binding = (uint32_t*) resource->binding;

-      if (!set || !binding) {
-        continue;
-      }
-
-      if (!(*set == resourceSet || (*set == 0 && *binding > LAST_BUILTIN_BINDING))) {
-        continue;
-      }
-
-      lovrCheck(resource->arraySize == 0, "Arrays of resources in shaders are not currently supported");
-      lovrCheck(resource->type != SPV_COMBINED_TEXTURE_SAMPLER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " combined texture sampler", " (use e.g. texture2D instead of sampler2D)");
-      lovrCheck(resource->type != SPV_UNIFORM_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " uniform texel buffer", "");
-      lovrCheck(resource->type != SPV_STORAGE_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " storage texel buffer", "");
-      lovrCheck(resource->type != SPV_INPUT_ATTACHMENT, "Shader variable '%s' is a%s, which is not supported%s", resource->name, "n input attachment", "");
-
-      static const gpu_slot_type resourceTypes[] = {
-        [SPV_UNIFORM_BUFFER] = GPU_SLOT_UNIFORM_BUFFER,
-        [SPV_STORAGE_BUFFER] = GPU_SLOT_STORAGE_BUFFER,
-        [SPV_SAMPLED_TEXTURE] = GPU_SLOT_SAMPLED_TEXTURE,
-        [SPV_STORAGE_TEXTURE] = GPU_SLOT_STORAGE_TEXTURE,
-        [SPV_SAMPLER] = GPU_SLOT_SAMPLER
-      };
-
-      gpu_phase stageMap[] = {
-        [STAGE_VERTEX] = GPU_STAGE_VERTEX,
-        [STAGE_FRAGMENT] = GPU_STAGE_FRAGMENT,
-        [STAGE_COMPUTE] = GPU_STAGE_COMPUTE
-      };
-
-      gpu_phase stagePhase[] = {
-        [STAGE_VERTEX] = GPU_PHASE_SHADER_VERTEX,
-        [STAGE_FRAGMENT] = GPU_PHASE_SHADER_FRAGMENT,
-        [STAGE_COMPUTE] = GPU_PHASE_SHADER_COMPUTE
-      };
-
-      uint32_t hash = (uint32_t) hash64(resource->name, strlen(resource->name));
-      bool skip = false;
-
-      // Merge resources between shader stages by name
-      for (uint32_t j = 0; j < lastResourceCount; j++) {
-        ShaderResource* other = &shader->resources[j];
-        if (other->hash == hash) {
-          lovrCheck(other->type == resourceTypes[resource->type], "Shader variable '%s' is declared in multiple shader stages with different types", resource->name);
-          slots[j].stages |= stageMap[stage];
-          shader->resources[j].phase |= stagePhase[stage];
-          *set = resourceSet;
-          *binding = shader->resources[j].binding;
-          skip = true;
-          break;
-        }
-      }
-
-      if (skip) {
-        continue;
-      }
-
+      // glslang outputs gl_DefaultUniformBlock, there's also the Constants macro which defines a DefaultUniformBlock UBO
      if (!strcmp(resource->name, "gl_DefaultUniformBlock") || !strcmp(resource->name, "DefaultUniformBlock")) {
        spv_field* block = resource->bufferFields;
        shader->uniformSize = block->elementSize;
@ -2997,64 +2954,109 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
        continue;
      }

-      uint32_t index = shader->resourceCount++;
-
-      lovrAssert(index < MAX_SHADER_RESOURCES, "Shader resource count exceeds resourcesPerShader limit (%d)", MAX_SHADER_RESOURCES);
-
-      if (*set != resourceSet) {
-        *set = resourceSet;
-        *binding = index;
+      // Skip builtin resources
+      if (info->type == SHADER_GRAPHICS && ((*set == 0 && *binding <= LAST_BUILTIN_BINDING) || *set == 1)) {
+        continue;
      }

-      slots[index] = (gpu_slot) {
-        .number = *binding,
-        .type = resourceTypes[resource->type],
-        .stages = stageMap[stage]
+      static const gpu_slot_type types[] = {
+        [SPV_UNIFORM_BUFFER] = GPU_SLOT_UNIFORM_BUFFER,
+        [SPV_STORAGE_BUFFER] = GPU_SLOT_STORAGE_BUFFER,
+        [SPV_SAMPLED_TEXTURE] = GPU_SLOT_SAMPLED_TEXTURE,
+        [SPV_STORAGE_TEXTURE] = GPU_SLOT_STORAGE_TEXTURE,
+        [SPV_SAMPLER] = GPU_SLOT_SAMPLER
      };

-      shader->resources[index] = (ShaderResource) {
-        .hash = hash,
-        .binding = *binding,
-        .type = resourceTypes[resource->type],
-        .phase = stagePhase[stage]
+      gpu_phase phases[] = {
+        [STAGE_VERTEX] = GPU_PHASE_SHADER_VERTEX,
+        [STAGE_FRAGMENT] = GPU_PHASE_SHADER_FRAGMENT,
+        [STAGE_COMPUTE] = GPU_PHASE_SHADER_COMPUTE
      };

-      if (resource->bufferFields) {
-        spv_field* field = &resource->bufferFields[0];
+      gpu_slot_type type = types[resource->type];
+      gpu_phase phase = phases[stage];

-        // Unwrap the container struct if it just contains a single struct or array of structs
-        if (field->fieldCount == 1 && field->totalFieldCount > 1) {
-          field = &field->fields[0];
-        } else if (field->totalFieldCount == 1 && field->fields[0].arrayLength > 0) {
-          // Arrays of non-aggregates get converted to an array of single-element structs to better
-          // match the way buffer formats work.  Note that we edit the spv_field, because DataFields
-          // get initialized later and so any edits to them would get overwritten.
-          spv_field* child = &field->fields[0];
-          field->arrayLength = child->arrayLength;
-          field->arrayStride = child->arrayStride;
-          field->elementSize = child->elementSize;
-          field->type = child->type; // This allows the field to be used as both AoS and single-field array
-          child->arrayLength = 0;
-          child->arrayStride = 0;
+      // Merge resources between shader stages, by name
+      bool merged = false;
+      uint32_t hash = (uint32_t) hash64(resource->name, strlen(resource->name));
+      for (uint32_t j = 0; j < lastResourceCount; j++) {
+        ShaderResource* other = &shader->resources[j];
+        if (other->hash == hash) {
+          lovrCheck(other->type == type, "Shader variable '%s' is declared in multiple shader stages with different types", resource->name);
+          *set = resourceSet;
+          *binding = shader->resources[j].binding;
+          shader->resources[j].phase |= phase;
+          merged = true;
+          break;
        }
+      }

-        shader->resources[index].fieldCount = field->totalFieldCount + 1;
-        shader->resources[index].format = shader->fields + ((s == 1 ? spv[0].fieldCount : 0) + (field - spv[s].fields));
+      if (merged) {
+        continue;
+      }
+
+      uint32_t index = shader->resourceCount++;
+
+      lovrCheck(index < MAX_SHADER_RESOURCES, "Shader resource count exceeds resourcesPerShader limit (%d)", MAX_SHADER_RESOURCES);
+      lovrCheck(resource->type != SPV_COMBINED_TEXTURE_SAMPLER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " combined texture sampler", " (use e.g. texture2D instead of sampler2D)");
+      lovrCheck(resource->type != SPV_UNIFORM_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " uniform texel buffer", "");
+      lovrCheck(resource->type != SPV_STORAGE_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " storage texel buffer", "");
+      lovrCheck(resource->type != SPV_INPUT_ATTACHMENT, "Shader variable '%s' is a%s, which is not supported%s", resource->name, "n input attachment", "");
+      lovrCheck(resource->arraySize == 0, "Arrays of resources in shaders are not currently supported");
+
+      // Move resources into set #2 and give them auto-incremented binding numbers starting at zero
+      // Compute shaders don't need remapping since everything's in set #0 and there are no builtins
+      if (!info->isDefault && info->type == SHADER_GRAPHICS && *set == 0 && *binding > LAST_BUILTIN_BINDING) {
+        *set = resourceSet;
+        *binding = index;
      }

      bool buffer = resource->type == SPV_UNIFORM_BUFFER || resource->type == SPV_STORAGE_BUFFER;
      bool texture = resource->type == SPV_SAMPLED_TEXTURE || resource->type == SPV_STORAGE_TEXTURE;
      bool sampler = resource->type == SPV_SAMPLER;
      bool storage = resource->type == SPV_STORAGE_BUFFER || resource->type == SPV_STORAGE_TEXTURE;
-      shader->bufferMask |= (buffer << *binding);
-      shader->textureMask |= (texture << *binding);
-      shader->samplerMask |= (sampler << *binding);
-      shader->storageMask |= (storage << *binding);
+
+      shader->bufferMask |= (buffer << index);
+      shader->textureMask |= (texture << index);
+      shader->samplerMask |= (sampler << index);
+      shader->storageMask |= (storage << index);
+
+      gpu_cache cache;

      if (storage) {
-        shader->resources[index].cache = stage == STAGE_COMPUTE ? GPU_CACHE_STORAGE_WRITE : GPU_CACHE_STORAGE_READ;
+        cache = info->type == SHADER_COMPUTE ? GPU_CACHE_STORAGE_WRITE : GPU_CACHE_STORAGE_READ;
      } else {
-        shader->resources[index].cache = texture ? GPU_CACHE_TEXTURE : GPU_CACHE_UNIFORM;
+        cache = texture ? GPU_CACHE_TEXTURE : GPU_CACHE_UNIFORM;
+      }
+
+      shader->resources[index] = (ShaderResource) {
+        .hash = hash,
+        .binding = *binding,
+        .type = type,
+        .phase = phase,
+        .cache = cache
+      };
+
+      if (buffer && resource->bufferFields) {
+        spv_field* field = &resource->bufferFields[0];
+
+        // The following conversions take place, for convenience and to better match Buffer formats:
+        // - Struct containing either single struct or single array of structs gets unwrapped
+        // - Struct containing single array of non-structs gets converted to array of single-field structs
+        if (field->fieldCount == 1 && field->totalFieldCount > 1) {
+          field = &field->fields[0];
+        } else if (field->totalFieldCount == 1 && field->fields[0].arrayLength > 0) {
+          spv_field* child = &field->fields[0];
+          field->arrayLength = child->arrayLength;
+          field->arrayStride = child->arrayStride;
+          field->elementSize = child->elementSize;
+          field->type = child->type;
+          child->arrayLength = 0;
+          child->arrayStride = 0;
+        }
+
+        shader->resources[index].fieldCount = field->totalFieldCount + 1;
+        shader->resources[index].format = shader->fields + ((s == 1 ? spv[0].fieldCount : 0) + (field - spv[s].fields));
      }
    }
  }
@ -3099,9 +3101,7 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
        .length = field->arrayLength,
        .stride = field->arrayLength > 0 ? field->arrayStride : field->elementSize, // Use stride as element size for non-arrays
        .fieldCount = field->fieldCount,
-        .fields = field->fields ?
-          shader->fields + base + (field->fields - spv[s].fields) :
-          NULL
+        .fields = field->fields ? shader->fields + base + (field->fields - spv[s].fields) : NULL
      };

      if (field->name) {
@ -3163,40 +3163,60 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
    }
  }

-  // Push constants
-  uint32_t pushConstantSize = 0;
-  for (uint32_t i = 0; i < info->stageCount; i++) {
-    if (spv[i].pushConstants) {
-      pushConstantSize = MAX(pushConstantSize, spv[i].pushConstants->elementSize);
-    }
+  // Layout
+  gpu_slot* slots = tempAlloc(&state.allocator, shader->resourceCount * sizeof(gpu_slot));
+  for (uint32_t i = 0; i < shader->resourceCount; i++) {
+    ShaderResource* resource = &shader->resources[i];
+    slots[i] = (gpu_slot) {
+      .number = resource->binding,
+      .type = resource->type,
+      .stages =
+        ((resource->phase & GPU_PHASE_SHADER_VERTEX) ? GPU_STAGE_VERTEX : 0) |
+        ((resource->phase & GPU_PHASE_SHADER_FRAGMENT) ? GPU_STAGE_FRAGMENT : 0) |
+        ((resource->phase & GPU_PHASE_SHADER_COMPUTE) ? GPU_STAGE_COMPUTE : 0)
+    };
  }

-  shader->ref = 1;
-  shader->gpu = (gpu_shader*) (shader + 1);
-  shader->info = *info;
  shader->layout = getLayout(slots, shader->resourceCount);

  gpu_shader_info gpu = {
-    .pushConstantSize = pushConstantSize,
+    .stageCount = info->stageCount,
+    .stages = tempAlloc(&state.allocator, info->stageCount * sizeof(gpu_shader_source)),
    .label = info->label
  };

  for (uint32_t i = 0; i < info->stageCount; i++) {
-    switch (info->stages[i].stage) {
-      case STAGE_VERTEX: gpu.vertex = (gpu_shader_stage) { .code = source[i], .length = info->stages[i].size }; break;
-      case STAGE_FRAGMENT: gpu.fragment = (gpu_shader_stage) { .code = source[i], .length = info->stages[i].size }; break;
-      case STAGE_COMPUTE: gpu.compute = (gpu_shader_stage) { .code = source[i], .length = info->stages[i].size }; break;
-      default: break;
+    const uint32_t stageMap[] = {
+      [STAGE_VERTEX] = GPU_STAGE_VERTEX,
+      [STAGE_FRAGMENT] = GPU_STAGE_FRAGMENT,
+      [STAGE_COMPUTE] = GPU_STAGE_COMPUTE
+    };
+
+    gpu.stages[i] = (gpu_shader_source) {
+      .stage = stageMap[info->stages[i].stage],
+      .code = source[i],
+      .length = info->stages[i].size
+    };
+  }
+
+  for (uint32_t i = 0; i < info->stageCount; i++) {
+    if (spv[i].pushConstants) {
+      gpu.pushConstantSize = MAX(gpu.pushConstantSize, spv[i].pushConstants->elementSize);
    }
  }

-  if (info->type == SHADER_GRAPHICS) {
-    gpu.layouts[0] = state.layouts.data[LAYOUT_BUILTIN].gpu;
-    gpu.layouts[1] = state.layouts.data[LAYOUT_MATERIAL].gpu;
-  }
+  gpu_layout* resourceLayout = state.layouts.data[shader->layout].gpu;
+  gpu_layout* uniformsLayout = shader->uniformSize > 0 ? state.layouts.data[LAYOUT_UNIFORMS].gpu : NULL;

-  gpu.layouts[resourceSet] = state.layouts.data[shader->layout].gpu;
-  if (shader->uniformSize > 0) gpu.layouts[uniformSet] = state.layouts.data[LAYOUT_UNIFORMS].gpu;
+  if (info->type == SHADER_GRAPHICS) {
+    gpu.layouts[0] = state.layouts.data[LAYOUT_BUILTINS].gpu;
+    gpu.layouts[1] = state.layouts.data[LAYOUT_MATERIAL].gpu;
+    gpu.layouts[2] = resourceLayout;
+    gpu.layouts[3] = uniformsLayout;
+  } else {
+    gpu.layouts[0] = resourceLayout;
+    gpu.layouts[1] = uniformsLayout;
+  }

  gpu_shader_init(shader->gpu, &gpu);
  lovrShaderInit(shader);
@ -5230,7 +5250,6 @@ void lovrPassReset(Pass* pass) {
  pass->pipeline = lovrPassAllocate(pass, PIPELINE_STACK_SIZE * sizeof(Pipeline));
  pass->bindings = lovrPassAllocate(pass, 32 * sizeof(gpu_binding));
  pass->uniforms = NULL;
-  pass->uniformSize = 0;
  pass->computeCount = 0;
  pass->computes = NULL;
  pass->drawCount = 0;
@ -5279,7 +5298,6 @@ void lovrPassReset(Pass* pass) {
  memset(pass->scissor, 0, sizeof(pass->scissor));

  pass->sampler = NULL;
-  pass->bindingMask = 0;
 }

 const PassStats* lovrPassGetStats(Pass* pass) {
@ -5654,88 +5672,96 @@ void lovrPassSetSampler(Pass* pass, Sampler* sampler) {
 }

 void lovrPassSetShader(Pass* pass, Shader* shader) {
-  Shader* previous = pass->pipeline->shader;
-  if (shader == previous) return;
+  Shader* old = pass->pipeline->shader;

-  bool fromCompute = previous && previous->info.type == SHADER_COMPUTE;
-  bool toCompute = shader && shader->info.type == SHADER_COMPUTE;
-
-  if (fromCompute ^ toCompute) {
-    pass->bindingMask = 0;
+  if (shader == old) {
+    return;
  }

-  // Clear any bindings for resources that share the same slot but have different types
  if (shader) {
-    if (previous) {
-      for (uint32_t i = 0, j = 0; i < previous->resourceCount && j < shader->resourceCount;) {
-        if (previous->resources[i].binding < shader->resources[j].binding) {
-          i++;
-        } else if (previous->resources[i].binding > shader->resources[j].binding) {
-          j++;
-        } else {
-          if (previous->resources[i].type != shader->resources[j].type) {
-            pass->bindingMask &= ~(1u << shader->resources[j].binding);
+    gpu_binding bindings[32];
+
+    // Ensure there's a valid binding for every resource in the new shader.  If the old shader had a
+    // binding with the same name and type, then use that, otherwise use a "default" resource.
+    for (uint32_t i = 0; i < shader->resourceCount; i++) {
+      ShaderResource* resource = &shader->resources[i];
+      bool useDefault = true;
+
+      if (old) {
+        ShaderResource* other = old->resources;
+        for (uint32_t j = 0; j < old->resourceCount; j++, other++) {
+          if (other->hash == resource->hash && other->type == resource->type) {
+            bindings[resource->binding] = pass->bindings[other->binding];
+            useDefault = false;
+            break;
          }
-          i++;
-          j++;
+        }
+      }
+
+      if (useDefault) {
+        switch (resource->type) {
+          case GPU_SLOT_UNIFORM_BUFFER:
+          case GPU_SLOT_STORAGE_BUFFER:
+            bindings[i].buffer.object = state.defaultBuffer->gpu;
+            bindings[i].buffer.offset = state.defaultBuffer->base;
+            bindings[i].buffer.extent = state.defaultBuffer->info.size;
+            break;
+          case GPU_SLOT_SAMPLED_TEXTURE:
+          case GPU_SLOT_STORAGE_TEXTURE:
+            bindings[i].texture = state.defaultTexture->gpu;
+            break;
+          case GPU_SLOT_SAMPLER:
+            bindings[i].sampler = state.defaultSamplers[FILTER_LINEAR]->gpu;
+            break;
+          default: break;
        }
      }
    }

-    uint32_t shaderSlots = (shader->bufferMask | shader->textureMask | shader->samplerMask);
-    uint32_t missingResources = shaderSlots & ~pass->bindingMask;
+    memcpy(pass->bindings, bindings, shader->resourceCount * sizeof(gpu_binding));
+    pass->flags |= DIRTY_BINDINGS;

-    // Assign default bindings to any slots used by the shader that are missing resources
-    if (missingResources) {
-      for (uint32_t i = 0; i < 32; i++) { // TODO biterationtrinsics
-        uint32_t bit = (1u << i);
+    // Uniform data is preserved for uniforms with the same name/size (this might be slow...)
+    if (shader->uniformCount > 0) {
+      void* uniforms = lovrPassAllocate(pass, shader->uniformSize);

-        if (~missingResources & bit) {
-          continue;
+      if (old && old->uniformCount > 0) {
+        for (uint32_t i = 0; i < shader->uniformCount; i++) {
+          DataField* uniform = &shader->uniforms[i];
+          DataField* other = old->uniforms;
+          for (uint32_t j = 0; j < old->uniformCount; j++, other++) {
+            if (uniform->hash == other->hash && uniform->stride == other->stride && uniform->length == other->length) {
+              void* src = (char*) pass->uniforms + other->offset;
+              void* dst = (char*) uniforms + uniform->offset;
+              size_t size = uniform->stride * MAX(uniform->length, 1);
+              memcpy(dst, src, size);
+            }
+          }
        }
-
-        pass->bindings[i].number = i;
-
-        if (shader->bufferMask & bit) {
-          pass->bindings[i].buffer.object = state.defaultBuffer->gpu;
-          pass->bindings[i].buffer.offset = state.defaultBuffer->base;
-          pass->bindings[i].buffer.extent = state.defaultBuffer->info.size;
-        } else if (shader->textureMask & bit) {
-          pass->bindings[i].texture = state.defaultTexture->gpu;
-        } else if (shader->samplerMask & bit) {
-          pass->bindings[i].sampler = state.defaultSamplers[FILTER_LINEAR]->gpu;
-        }
-
-        pass->bindingMask |= bit;
+      } else {
+        memset(uniforms, 0, shader->uniformSize);
      }

-      pass->flags |= DIRTY_BINDINGS;
+      pass->uniforms = uniforms;
+      pass->flags |= DIRTY_UNIFORMS;
+    } else {
+      pass->flags &= ~DIRTY_UNIFORMS;
+    }
+
+    // Custom vertex attributes must be reset: their locations may differ even if the names match
+    if (shader->hasCustomAttributes) {
+      pass->pipeline->lastVertexBuffer = NULL;
    }

    pass->pipeline->info.shader = shader->gpu;
    pass->pipeline->info.flags = shader->flags;
    pass->pipeline->info.flagCount = shader->overrideCount;
+    lovrRetain(shader);
  }

-  lovrRetain(shader);
-  lovrRelease(previous, lovrShaderDestroy);
+  lovrRelease(old, lovrShaderDestroy);
  pass->pipeline->shader = shader;
  pass->pipeline->dirty = true;
-
-  // If the shader changes, all the attribute names need to be wired up again, because attributes
-  // with the same name might have different locations.  But if the shader only uses built-in
-  // attributes (which is common), things will remain stable.
-  if ((shader && shader->hasCustomAttributes) || (previous && previous->hasCustomAttributes)) {
-    pass->pipeline->lastVertexBuffer = NULL;
-  }
-
-  if (shader && shader->uniformSize > pass->uniformSize) {
-    void* uniforms = lovrPassAllocate(pass, shader->uniformSize);
-    if (pass->uniforms) memcpy(uniforms, pass->uniforms, pass->uniformSize);
-    pass->uniformSize = shader->uniformSize;
-    pass->uniforms = uniforms;
-    pass->flags |= DIRTY_UNIFORMS;
-  }
 }

 void lovrPassSetStencilTest(Pass* pass, CompareMode test, uint8_t value, uint8_t mask) {
@ -5823,7 +5849,6 @@ void lovrPassSendBuffer(Pass* pass, const char* name, size_t length, Buffer* buf
  pass->bindings[slot].buffer.object = buffer->gpu;
  pass->bindings[slot].buffer.offset = buffer->base + offset;
  pass->bindings[slot].buffer.extent = extent;
-  pass->bindingMask |= (1u << slot);
  pass->flags |= DIRTY_BINDINGS;
 }

@ -5845,7 +5870,6 @@ void lovrPassSendTexture(Pass* pass, const char* name, size_t length, Texture* t

  trackTexture(pass, texture, resource->phase, resource->cache);
  pass->bindings[slot].texture = view;
-  pass->bindingMask |= (1u << slot);
  pass->flags |= DIRTY_BINDINGS;
 }

@ -5858,7 +5882,6 @@ void lovrPassSendSampler(Pass* pass, const char* name, size_t length, Sampler* s
  lovrCheck(shader->samplerMask & (1u << slot), "Trying to send a Sampler to '%s', but the active Shader doesn't have a Sampler in that slot", name);

  pass->bindings[slot].sampler = sampler->gpu;
-  pass->bindingMask |= (1u << slot);
  pass->flags |= DIRTY_BINDINGS;
 }

@ -5885,7 +5908,6 @@ void lovrPassSendData(Pass* pass, const char* name, size_t length, void** data,
  uint32_t size = resource->format->stride * MAX(resource->format->length, 1);
  BufferView view = lovrPassGetBuffer(pass, size, state.limits.uniformBufferAlign);
  pass->bindings[slot].buffer = (gpu_buffer_binding) { view.buffer, view.offset, view.extent };
-  pass->bindingMask |= (1u << slot);
  pass->flags |= DIRTY_BINDINGS;

  *data = view.pointer;
@ -6059,6 +6081,8 @@ static gpu_bundle_info* lovrPassResolveBindings(Pass* pass, Shader* shader, gpu_
  for (uint32_t i = 0; i < bundle->count; i++) {
    bundle->bindings[i] = pass->bindings[shader->resources[i].binding];
    bundle->bindings[i].type = shader->resources[i].type;
+    bundle->bindings[i].number = shader->resources[i].binding;
+    bundle->bindings[i].count = 0;
  }

  pass->flags &= ~DIRTY_BINDINGS;
--- a/src/modules/graphics/graphics.h
+++ b/src/modules/graphics/graphics.h
@ -323,6 +323,7 @@ typedef struct {
  ShaderFlag* flags;
  uint32_t flagCount;
  const char* label;
+  bool isDefault;
 } ShaderInfo;

 typedef void* ShaderIncluder(const char* filename, size_t* bytesRead);
Author	SHA1	Message	Date
bjorn	ec380e0cfd	rm pass uniformSize; It's no longer necessary.	2024-02-25 14:58:11 -08:00
bjorn	fa8ea6732b	rm pass binding mask; It's no longer necessary.	2024-02-25 14:57:27 -08:00
bjorn	466a052ded	Cleanup;	2024-02-24 15:45:30 -08:00
bjorn	2fe5ba8f3b	Ensure all binding fields are fully initialized;	2024-02-24 15:45:10 -08:00
bjorn	ae19b7aad3	Fix resource binding numbers;	2024-02-24 15:33:09 -08:00
bjorn	652a074677	Update resource invalidation when switching shaders; Previously, when switching shaders, resource bindings would be preserved for resources with matching slots/types. This doesn't make sense in a world where binding numbers are internal. Instead, match up resources by name/type. Additionally, rewire all the uniforms by name/size so uniforms with the same name get preserved (name/type would be too hard for e.g. structs). This seems like it would be horribly slow and may need to be optimized, optional, or removed. I didn't test any of this lol, but I will I promise.	2024-02-24 14:34:29 -08:00
bjorn	bd83ad6eb4	Cleanup;	2024-02-24 11:49:11 -08:00