Compare commits

...

7 Commits

Author SHA1 Message Date
bjorn ec380e0cfd rm pass uniformSize;
It's no longer necessary.
2024-02-25 14:58:11 -08:00
bjorn fa8ea6732b rm pass binding mask;
It's no longer necessary.
2024-02-25 14:57:27 -08:00
bjorn 466a052ded Cleanup; 2024-02-24 15:45:30 -08:00
bjorn 2fe5ba8f3b Ensure all binding fields are fully initialized; 2024-02-24 15:45:10 -08:00
bjorn ae19b7aad3 Fix resource binding numbers; 2024-02-24 15:33:09 -08:00
bjorn 652a074677 Update resource invalidation when switching shaders;
Previously, when switching shaders, resource bindings would be preserved
for resources with matching slots/types.  This doesn't make sense in a
world where binding numbers are internal.  Instead, match up resources
by name/type.

Additionally, rewire all the uniforms by name/size so uniforms with the
same name get preserved (name/type would be too hard for e.g. structs).
This seems like it would be horribly slow and may need to be optimized,
optional, or removed.

I didn't test any of this lol, but I will I promise.
2024-02-24 14:34:29 -08:00
bjorn bd83ad6eb4 Cleanup; 2024-02-24 11:49:11 -08:00
5 changed files with 256 additions and 232 deletions

View File

@ -1072,10 +1072,12 @@ static int l_lovrGraphicsNewShader(lua_State* L) {
Shader* shader = lovrShaderCreate(&info); Shader* shader = lovrShaderCreate(&info);
luax_pushtype(L, Shader, shader); luax_pushtype(L, Shader, shader);
lovrRelease(shader, lovrShaderDestroy); lovrRelease(shader, lovrShaderDestroy);
if (shouldFree[0]) free((void*) source[0].code);
if (shouldFree[1]) free((void*) source[1].code); for (uint32_t i = 0; i < info.stageCount; i++) {
if (source[0].code != compiled[0].code) free((void*) compiled[0].code); if (shouldFree[i]) free((void*) source[i].code);
if (source[1].code != compiled[1].code) free((void*) compiled[1].code); if (source[i].code != compiled[i].code) free((void*) compiled[i].code);
}
arr_free(&flags); arr_free(&flags);
return 1; return 1;
} }

View File

@ -245,16 +245,16 @@ void gpu_layout_destroy(gpu_layout* layout);
// Shader // Shader
typedef struct { typedef struct {
uint32_t stage;
const void* code; const void* code;
size_t length; size_t length;
} gpu_shader_stage; } gpu_shader_source;
typedef struct { typedef struct {
gpu_shader_stage vertex; uint32_t stageCount;
gpu_shader_stage fragment; gpu_shader_source* stages;
gpu_shader_stage compute;
gpu_layout* layouts[4];
uint32_t pushConstantSize; uint32_t pushConstantSize;
gpu_layout* layouts[4];
const char* label; const char* label;
} gpu_shader_info; } gpu_shader_info;

View File

@ -988,31 +988,28 @@ void gpu_layout_destroy(gpu_layout* layout) {
// Shader // Shader
bool gpu_shader_init(gpu_shader* shader, gpu_shader_info* info) { bool gpu_shader_init(gpu_shader* shader, gpu_shader_info* info) {
struct { VkShaderStageFlags flags; gpu_shader_stage* source; } stages[] = {
{ VK_SHADER_STAGE_VERTEX_BIT, &info->vertex },
{ VK_SHADER_STAGE_FRAGMENT_BIT, &info->fragment },
{ VK_SHADER_STAGE_COMPUTE_BIT, &info->compute }
};
uint32_t stageCount = 0;
VkShaderStageFlags stageFlags = 0; VkShaderStageFlags stageFlags = 0;
for (uint32_t i = 0; i < COUNTOF(stages); i++) { for (uint32_t i = 0; i < info->stageCount; i++) {
if (!stages[i].source->code) continue; switch (info->stages[i].stage) {
case GPU_STAGE_VERTEX: stageFlags |= VK_SHADER_STAGE_VERTEX_BIT; break;
case GPU_STAGE_FRAGMENT: stageFlags |= VK_SHADER_STAGE_FRAGMENT_BIT; break;
case GPU_STAGE_COMPUTE: stageFlags |= VK_SHADER_STAGE_COMPUTE_BIT; break;
default: return false;
}
}
for (uint32_t i = 0; i < info->stageCount; i++) {
VkShaderModuleCreateInfo moduleInfo = { VkShaderModuleCreateInfo moduleInfo = {
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.codeSize = stages[i].source->length, .codeSize = info->stages[i].length,
.pCode = stages[i].source->code .pCode = info->stages[i].code
}; };
VK(vkCreateShaderModule(state.device, &moduleInfo, NULL, &shader->handles[stageCount]), "Failed to load shader") { VK(vkCreateShaderModule(state.device, &moduleInfo, NULL, &shader->handles[i]), "Failed to load shader") {
return false; return false;
} }
nickname(shader->handles[i], VK_OBJECT_TYPE_SHADER_MODULE, info->label); nickname(shader->handles[i], VK_OBJECT_TYPE_SHADER_MODULE, info->label);
stageFlags |= stages[i].flags;
stageCount++;
} }
VkDescriptorSetLayout layouts[4]; VkDescriptorSetLayout layouts[4];

View File

@ -30,7 +30,7 @@
#define PIPELINE_STACK_SIZE 4 #define PIPELINE_STACK_SIZE 4
#define MAX_SHADER_RESOURCES 32 #define MAX_SHADER_RESOURCES 32
#define MAX_CUSTOM_ATTRIBUTES 10 #define MAX_CUSTOM_ATTRIBUTES 10
#define LAYOUT_BUILTIN 0 #define LAYOUT_BUILTINS 0
#define LAYOUT_MATERIAL 1 #define LAYOUT_MATERIAL 1
#define LAYOUT_UNIFORMS 2 #define LAYOUT_UNIFORMS 2
#define FLOAT_BITS(f) ((union { float f; uint32_t u; }) { f }).u #define FLOAT_BITS(f) ((union { float f; uint32_t u; }) { f }).u
@ -525,9 +525,7 @@ struct Pass {
uint32_t transformIndex; uint32_t transformIndex;
uint32_t pipelineIndex; uint32_t pipelineIndex;
gpu_binding* bindings; gpu_binding* bindings;
uint32_t bindingMask;
void* uniforms; void* uniforms;
uint32_t uniformSize;
uint32_t computeCount; uint32_t computeCount;
Compute* computes; Compute* computes;
uint32_t drawCount; uint32_t drawCount;
@ -698,7 +696,7 @@ bool lovrGraphicsInit(GraphicsConfig* config) {
}; };
size_t builtinLayout = getLayout(builtinSlots, COUNTOF(builtinSlots)); size_t builtinLayout = getLayout(builtinSlots, COUNTOF(builtinSlots));
if (builtinLayout != LAYOUT_BUILTIN) lovrUnreachable(); if (builtinLayout != LAYOUT_BUILTINS) lovrUnreachable();
gpu_slot materialSlots[] = { gpu_slot materialSlots[] = {
{ 0, GPU_SLOT_UNIFORM_BUFFER, GPU_STAGE_GRAPHICS }, // Data { 0, GPU_SLOT_UNIFORM_BUFFER, GPU_STAGE_GRAPHICS }, // Data
@ -1293,7 +1291,7 @@ static void recordRenderPass(Pass* pass, gpu_stream* stream) {
data->color[3] = draw->color[3]; data->color[3] = draw->color[3];
} }
gpu_bundle* builtinBundle = getBundle(LAYOUT_BUILTIN, builtins, COUNTOF(builtins)); gpu_bundle* builtinBundle = getBundle(LAYOUT_BUILTINS, builtins, COUNTOF(builtins));
// Pipelines // Pipelines
@ -2820,7 +2818,8 @@ Shader* lovrGraphicsGetDefaultShader(DefaultShader type) {
}, },
.stageCount = 1, .stageCount = 1,
.flags = &(ShaderFlag) { NULL, 0, state.device.subgroupSize }, .flags = &(ShaderFlag) { NULL, 0, state.device.subgroupSize },
.flagCount = 1 .flagCount = 1,
.isDefault = true
}); });
default: default:
return state.defaultShaders[type] = lovrShaderCreate(&(ShaderInfo) { return state.defaultShaders[type] = lovrShaderCreate(&(ShaderInfo) {
@ -2829,7 +2828,8 @@ Shader* lovrGraphicsGetDefaultShader(DefaultShader type) {
lovrGraphicsGetDefaultShaderSource(type, STAGE_VERTEX), lovrGraphicsGetDefaultShaderSource(type, STAGE_VERTEX),
lovrGraphicsGetDefaultShaderSource(type, STAGE_FRAGMENT) lovrGraphicsGetDefaultShaderSource(type, STAGE_FRAGMENT)
}, },
.stageCount = 2 .stageCount = 2,
.isDefault = true
}); });
} }
} }
@ -2837,9 +2837,11 @@ Shader* lovrGraphicsGetDefaultShader(DefaultShader type) {
Shader* lovrShaderCreate(const ShaderInfo* info) { Shader* lovrShaderCreate(const ShaderInfo* info) {
Shader* shader = calloc(1, sizeof(Shader) + gpu_sizeof_shader()); Shader* shader = calloc(1, sizeof(Shader) + gpu_sizeof_shader());
lovrAssert(shader, "Out of memory"); lovrAssert(shader, "Out of memory");
shader->ref = 1;
shader->gpu = (gpu_shader*) (shader + 1);
shader->info = *info;
size_t stack = tempPush(&state.allocator); // Validate stage combinations
for (uint32_t i = 0; i < info->stageCount; i++) { for (uint32_t i = 0; i < info->stageCount; i++) {
shader->stageMask |= (1 << info->stages[i].stage); shader->stageMask |= (1 << info->stages[i].stage);
} }
@ -2850,7 +2852,9 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
lovrCheck(shader->stageMask == FLAG_COMPUTE, "Compute shaders can only have a compute stage"); lovrCheck(shader->stageMask == FLAG_COMPUTE, "Compute shaders can only have a compute stage");
} }
// Copy the source, because we perform edits on the SPIR-V and the input might be readonly memory size_t stack = tempPush(&state.allocator);
// Copy the source to temp memory (we perform edits on the SPIR-V and the input might be readonly)
void* source[2]; void* source[2];
for (uint32_t i = 0; i < info->stageCount; i++) { for (uint32_t i = 0; i < info->stageCount; i++) {
source[i] = tempAlloc(&state.allocator, info->stages[i].size); source[i] = tempAlloc(&state.allocator, info->stages[i].size);
@ -2891,25 +2895,31 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
} }
} }
// Allocate // Allocate memory
gpu_slot* slots = tempAlloc(&state.allocator, maxResources * sizeof(gpu_slot));
shader->resources = malloc(maxResources * sizeof(ShaderResource)); shader->resources = malloc(maxResources * sizeof(ShaderResource));
shader->fields = malloc(maxFields * sizeof(DataField)); shader->fields = malloc(maxFields * sizeof(DataField));
shader->names = malloc(maxChars);
shader->flags = malloc(maxSpecConstants * sizeof(gpu_shader_flag)); shader->flags = malloc(maxSpecConstants * sizeof(gpu_shader_flag));
shader->flagLookup = malloc(maxSpecConstants * sizeof(uint32_t)); shader->flagLookup = malloc(maxSpecConstants * sizeof(uint32_t));
shader->names = malloc(maxChars); lovrAssert(shader->resources, "Out of memory");
lovrAssert(shader->resources && shader->fields && shader->names, "Out of memory"); lovrAssert(shader->fields, "Out of memory");
lovrAssert(shader->flags && shader->flagLookup, "Out of memory"); lovrAssert(shader->names, "Out of memory");
lovrAssert(shader->flags, "Out of memory");
lovrAssert(shader->flagLookup, "Out of memory");
// Stage-specific metadata // Workgroup size
if (info->type == SHADER_COMPUTE) { if (info->type == SHADER_COMPUTE) {
memcpy(shader->workgroupSize, spv[0].workgroupSize, 3 * sizeof(uint32_t)); uint32_t* workgroupSize = spv[0].workgroupSize;
lovrCheck(shader->workgroupSize[0] <= state.limits.workgroupSize[0], "Shader workgroup size exceeds the 'workgroupSize' limit"); uint32_t totalWorkgroupSize = workgroupSize[0] * workgroupSize[1] * workgroupSize[2];
lovrCheck(shader->workgroupSize[1] <= state.limits.workgroupSize[1], "Shader workgroup size exceeds the 'workgroupSize' limit"); lovrCheck(workgroupSize[0] <= state.limits.workgroupSize[0], "Shader workgroup size exceeds the 'workgroupSize' limit");
lovrCheck(shader->workgroupSize[2] <= state.limits.workgroupSize[2], "Shader workgroup size exceeds the 'workgroupSize' limit"); lovrCheck(workgroupSize[1] <= state.limits.workgroupSize[1], "Shader workgroup size exceeds the 'workgroupSize' limit");
uint32_t totalWorkgroupSize = shader->workgroupSize[0] * shader->workgroupSize[1] * shader->workgroupSize[2]; lovrCheck(workgroupSize[2] <= state.limits.workgroupSize[2], "Shader workgroup size exceeds the 'workgroupSize' limit");
lovrCheck(totalWorkgroupSize <= state.limits.totalWorkgroupSize, "Shader workgroup size exceeds the 'totalWorkgroupSize' limit"); lovrCheck(totalWorkgroupSize <= state.limits.totalWorkgroupSize, "Shader workgroup size exceeds the 'totalWorkgroupSize' limit");
} else if (spv[0].attributeCount > 0) { memcpy(shader->workgroupSize, workgroupSize, 3 * sizeof(uint32_t));
}
// Vertex attributes
if (info->type == SHADER_GRAPHICS && spv[0].attributeCount > 0) {
shader->attributeCount = spv[0].attributeCount; shader->attributeCount = spv[0].attributeCount;
shader->attributes = malloc(shader->attributeCount * sizeof(ShaderAttribute)); shader->attributes = malloc(shader->attributeCount * sizeof(ShaderAttribute));
lovrAssert(shader->attributes, "Out of memory"); lovrAssert(shader->attributes, "Out of memory");
@ -2922,71 +2932,18 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
uint32_t resourceSet = info->type == SHADER_COMPUTE ? 0 : 2; uint32_t resourceSet = info->type == SHADER_COMPUTE ? 0 : 2;
uint32_t uniformSet = info->type == SHADER_COMPUTE ? 1 : 3; uint32_t uniformSet = info->type == SHADER_COMPUTE ? 1 : 3;
uint32_t lastResourceCount = 0;
// Resources // Resources
for (uint32_t s = 0; s < info->stageCount; s++, lastResourceCount = shader->resourceCount) { for (uint32_t s = 0, lastResourceCount = 0; s < info->stageCount; s++, lastResourceCount = shader->resourceCount) {
ShaderStage stage = info->stages[s].stage; ShaderStage stage = info->stages[s].stage;
for (uint32_t i = 0; i < spv[s].resourceCount; i++) { for (uint32_t i = 0; i < spv[s].resourceCount; i++) {
spv_resource* resource = &spv[s].resources[i]; spv_resource* resource = &spv[s].resources[i];
// It's safe to cast away const because we are operating on a copy of the input
uint32_t* set = (uint32_t*) resource->set; uint32_t* set = (uint32_t*) resource->set;
uint32_t* binding = (uint32_t*) resource->binding; uint32_t* binding = (uint32_t*) resource->binding;
if (!set || !binding) { // glslang outputs gl_DefaultUniformBlock, there's also the Constants macro which defines a DefaultUniformBlock UBO
continue;
}
if (!(*set == resourceSet || (*set == 0 && *binding > LAST_BUILTIN_BINDING))) {
continue;
}
lovrCheck(resource->arraySize == 0, "Arrays of resources in shaders are not currently supported");
lovrCheck(resource->type != SPV_COMBINED_TEXTURE_SAMPLER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " combined texture sampler", " (use e.g. texture2D instead of sampler2D)");
lovrCheck(resource->type != SPV_UNIFORM_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " uniform texel buffer", "");
lovrCheck(resource->type != SPV_STORAGE_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " storage texel buffer", "");
lovrCheck(resource->type != SPV_INPUT_ATTACHMENT, "Shader variable '%s' is a%s, which is not supported%s", resource->name, "n input attachment", "");
static const gpu_slot_type resourceTypes[] = {
[SPV_UNIFORM_BUFFER] = GPU_SLOT_UNIFORM_BUFFER,
[SPV_STORAGE_BUFFER] = GPU_SLOT_STORAGE_BUFFER,
[SPV_SAMPLED_TEXTURE] = GPU_SLOT_SAMPLED_TEXTURE,
[SPV_STORAGE_TEXTURE] = GPU_SLOT_STORAGE_TEXTURE,
[SPV_SAMPLER] = GPU_SLOT_SAMPLER
};
gpu_phase stageMap[] = {
[STAGE_VERTEX] = GPU_STAGE_VERTEX,
[STAGE_FRAGMENT] = GPU_STAGE_FRAGMENT,
[STAGE_COMPUTE] = GPU_STAGE_COMPUTE
};
gpu_phase stagePhase[] = {
[STAGE_VERTEX] = GPU_PHASE_SHADER_VERTEX,
[STAGE_FRAGMENT] = GPU_PHASE_SHADER_FRAGMENT,
[STAGE_COMPUTE] = GPU_PHASE_SHADER_COMPUTE
};
uint32_t hash = (uint32_t) hash64(resource->name, strlen(resource->name));
bool skip = false;
// Merge resources between shader stages by name
for (uint32_t j = 0; j < lastResourceCount; j++) {
ShaderResource* other = &shader->resources[j];
if (other->hash == hash) {
lovrCheck(other->type == resourceTypes[resource->type], "Shader variable '%s' is declared in multiple shader stages with different types", resource->name);
slots[j].stages |= stageMap[stage];
shader->resources[j].phase |= stagePhase[stage];
*set = resourceSet;
*binding = shader->resources[j].binding;
skip = true;
break;
}
}
if (skip) {
continue;
}
if (!strcmp(resource->name, "gl_DefaultUniformBlock") || !strcmp(resource->name, "DefaultUniformBlock")) { if (!strcmp(resource->name, "gl_DefaultUniformBlock") || !strcmp(resource->name, "DefaultUniformBlock")) {
spv_field* block = resource->bufferFields; spv_field* block = resource->bufferFields;
shader->uniformSize = block->elementSize; shader->uniformSize = block->elementSize;
@ -2997,64 +2954,109 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
continue; continue;
} }
uint32_t index = shader->resourceCount++; // Skip builtin resources
if (info->type == SHADER_GRAPHICS && ((*set == 0 && *binding <= LAST_BUILTIN_BINDING) || *set == 1)) {
lovrAssert(index < MAX_SHADER_RESOURCES, "Shader resource count exceeds resourcesPerShader limit (%d)", MAX_SHADER_RESOURCES); continue;
if (*set != resourceSet) {
*set = resourceSet;
*binding = index;
} }
slots[index] = (gpu_slot) { static const gpu_slot_type types[] = {
.number = *binding, [SPV_UNIFORM_BUFFER] = GPU_SLOT_UNIFORM_BUFFER,
.type = resourceTypes[resource->type], [SPV_STORAGE_BUFFER] = GPU_SLOT_STORAGE_BUFFER,
.stages = stageMap[stage] [SPV_SAMPLED_TEXTURE] = GPU_SLOT_SAMPLED_TEXTURE,
[SPV_STORAGE_TEXTURE] = GPU_SLOT_STORAGE_TEXTURE,
[SPV_SAMPLER] = GPU_SLOT_SAMPLER
}; };
shader->resources[index] = (ShaderResource) { gpu_phase phases[] = {
.hash = hash, [STAGE_VERTEX] = GPU_PHASE_SHADER_VERTEX,
.binding = *binding, [STAGE_FRAGMENT] = GPU_PHASE_SHADER_FRAGMENT,
.type = resourceTypes[resource->type], [STAGE_COMPUTE] = GPU_PHASE_SHADER_COMPUTE
.phase = stagePhase[stage]
}; };
if (resource->bufferFields) { gpu_slot_type type = types[resource->type];
spv_field* field = &resource->bufferFields[0]; gpu_phase phase = phases[stage];
// Unwrap the container struct if it just contains a single struct or array of structs // Merge resources between shader stages, by name
if (field->fieldCount == 1 && field->totalFieldCount > 1) { bool merged = false;
field = &field->fields[0]; uint32_t hash = (uint32_t) hash64(resource->name, strlen(resource->name));
} else if (field->totalFieldCount == 1 && field->fields[0].arrayLength > 0) { for (uint32_t j = 0; j < lastResourceCount; j++) {
// Arrays of non-aggregates get converted to an array of single-element structs to better ShaderResource* other = &shader->resources[j];
// match the way buffer formats work. Note that we edit the spv_field, because DataFields if (other->hash == hash) {
// get initialized later and so any edits to them would get overwritten. lovrCheck(other->type == type, "Shader variable '%s' is declared in multiple shader stages with different types", resource->name);
spv_field* child = &field->fields[0]; *set = resourceSet;
field->arrayLength = child->arrayLength; *binding = shader->resources[j].binding;
field->arrayStride = child->arrayStride; shader->resources[j].phase |= phase;
field->elementSize = child->elementSize; merged = true;
field->type = child->type; // This allows the field to be used as both AoS and single-field array break;
child->arrayLength = 0;
child->arrayStride = 0;
} }
}
shader->resources[index].fieldCount = field->totalFieldCount + 1; if (merged) {
shader->resources[index].format = shader->fields + ((s == 1 ? spv[0].fieldCount : 0) + (field - spv[s].fields)); continue;
}
uint32_t index = shader->resourceCount++;
lovrCheck(index < MAX_SHADER_RESOURCES, "Shader resource count exceeds resourcesPerShader limit (%d)", MAX_SHADER_RESOURCES);
lovrCheck(resource->type != SPV_COMBINED_TEXTURE_SAMPLER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " combined texture sampler", " (use e.g. texture2D instead of sampler2D)");
lovrCheck(resource->type != SPV_UNIFORM_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " uniform texel buffer", "");
lovrCheck(resource->type != SPV_STORAGE_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " storage texel buffer", "");
lovrCheck(resource->type != SPV_INPUT_ATTACHMENT, "Shader variable '%s' is a%s, which is not supported%s", resource->name, "n input attachment", "");
lovrCheck(resource->arraySize == 0, "Arrays of resources in shaders are not currently supported");
// Move resources into set #2 and give them auto-incremented binding numbers starting at zero
// Compute shaders don't need remapping since everything's in set #0 and there are no builtins
if (!info->isDefault && info->type == SHADER_GRAPHICS && *set == 0 && *binding > LAST_BUILTIN_BINDING) {
*set = resourceSet;
*binding = index;
} }
bool buffer = resource->type == SPV_UNIFORM_BUFFER || resource->type == SPV_STORAGE_BUFFER; bool buffer = resource->type == SPV_UNIFORM_BUFFER || resource->type == SPV_STORAGE_BUFFER;
bool texture = resource->type == SPV_SAMPLED_TEXTURE || resource->type == SPV_STORAGE_TEXTURE; bool texture = resource->type == SPV_SAMPLED_TEXTURE || resource->type == SPV_STORAGE_TEXTURE;
bool sampler = resource->type == SPV_SAMPLER; bool sampler = resource->type == SPV_SAMPLER;
bool storage = resource->type == SPV_STORAGE_BUFFER || resource->type == SPV_STORAGE_TEXTURE; bool storage = resource->type == SPV_STORAGE_BUFFER || resource->type == SPV_STORAGE_TEXTURE;
shader->bufferMask |= (buffer << *binding);
shader->textureMask |= (texture << *binding); shader->bufferMask |= (buffer << index);
shader->samplerMask |= (sampler << *binding); shader->textureMask |= (texture << index);
shader->storageMask |= (storage << *binding); shader->samplerMask |= (sampler << index);
shader->storageMask |= (storage << index);
gpu_cache cache;
if (storage) { if (storage) {
shader->resources[index].cache = stage == STAGE_COMPUTE ? GPU_CACHE_STORAGE_WRITE : GPU_CACHE_STORAGE_READ; cache = info->type == SHADER_COMPUTE ? GPU_CACHE_STORAGE_WRITE : GPU_CACHE_STORAGE_READ;
} else { } else {
shader->resources[index].cache = texture ? GPU_CACHE_TEXTURE : GPU_CACHE_UNIFORM; cache = texture ? GPU_CACHE_TEXTURE : GPU_CACHE_UNIFORM;
}
shader->resources[index] = (ShaderResource) {
.hash = hash,
.binding = *binding,
.type = type,
.phase = phase,
.cache = cache
};
if (buffer && resource->bufferFields) {
spv_field* field = &resource->bufferFields[0];
// The following conversions take place, for convenience and to better match Buffer formats:
// - Struct containing either single struct or single array of structs gets unwrapped
// - Struct containing single array of non-structs gets converted to array of single-field structs
if (field->fieldCount == 1 && field->totalFieldCount > 1) {
field = &field->fields[0];
} else if (field->totalFieldCount == 1 && field->fields[0].arrayLength > 0) {
spv_field* child = &field->fields[0];
field->arrayLength = child->arrayLength;
field->arrayStride = child->arrayStride;
field->elementSize = child->elementSize;
field->type = child->type;
child->arrayLength = 0;
child->arrayStride = 0;
}
shader->resources[index].fieldCount = field->totalFieldCount + 1;
shader->resources[index].format = shader->fields + ((s == 1 ? spv[0].fieldCount : 0) + (field - spv[s].fields));
} }
} }
} }
@ -3099,9 +3101,7 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
.length = field->arrayLength, .length = field->arrayLength,
.stride = field->arrayLength > 0 ? field->arrayStride : field->elementSize, // Use stride as element size for non-arrays .stride = field->arrayLength > 0 ? field->arrayStride : field->elementSize, // Use stride as element size for non-arrays
.fieldCount = field->fieldCount, .fieldCount = field->fieldCount,
.fields = field->fields ? .fields = field->fields ? shader->fields + base + (field->fields - spv[s].fields) : NULL
shader->fields + base + (field->fields - spv[s].fields) :
NULL
}; };
if (field->name) { if (field->name) {
@ -3163,40 +3163,60 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
} }
} }
// Push constants // Layout
uint32_t pushConstantSize = 0; gpu_slot* slots = tempAlloc(&state.allocator, shader->resourceCount * sizeof(gpu_slot));
for (uint32_t i = 0; i < info->stageCount; i++) { for (uint32_t i = 0; i < shader->resourceCount; i++) {
if (spv[i].pushConstants) { ShaderResource* resource = &shader->resources[i];
pushConstantSize = MAX(pushConstantSize, spv[i].pushConstants->elementSize); slots[i] = (gpu_slot) {
} .number = resource->binding,
.type = resource->type,
.stages =
((resource->phase & GPU_PHASE_SHADER_VERTEX) ? GPU_STAGE_VERTEX : 0) |
((resource->phase & GPU_PHASE_SHADER_FRAGMENT) ? GPU_STAGE_FRAGMENT : 0) |
((resource->phase & GPU_PHASE_SHADER_COMPUTE) ? GPU_STAGE_COMPUTE : 0)
};
} }
shader->ref = 1;
shader->gpu = (gpu_shader*) (shader + 1);
shader->info = *info;
shader->layout = getLayout(slots, shader->resourceCount); shader->layout = getLayout(slots, shader->resourceCount);
gpu_shader_info gpu = { gpu_shader_info gpu = {
.pushConstantSize = pushConstantSize, .stageCount = info->stageCount,
.stages = tempAlloc(&state.allocator, info->stageCount * sizeof(gpu_shader_source)),
.label = info->label .label = info->label
}; };
for (uint32_t i = 0; i < info->stageCount; i++) { for (uint32_t i = 0; i < info->stageCount; i++) {
switch (info->stages[i].stage) { const uint32_t stageMap[] = {
case STAGE_VERTEX: gpu.vertex = (gpu_shader_stage) { .code = source[i], .length = info->stages[i].size }; break; [STAGE_VERTEX] = GPU_STAGE_VERTEX,
case STAGE_FRAGMENT: gpu.fragment = (gpu_shader_stage) { .code = source[i], .length = info->stages[i].size }; break; [STAGE_FRAGMENT] = GPU_STAGE_FRAGMENT,
case STAGE_COMPUTE: gpu.compute = (gpu_shader_stage) { .code = source[i], .length = info->stages[i].size }; break; [STAGE_COMPUTE] = GPU_STAGE_COMPUTE
default: break; };
gpu.stages[i] = (gpu_shader_source) {
.stage = stageMap[info->stages[i].stage],
.code = source[i],
.length = info->stages[i].size
};
}
for (uint32_t i = 0; i < info->stageCount; i++) {
if (spv[i].pushConstants) {
gpu.pushConstantSize = MAX(gpu.pushConstantSize, spv[i].pushConstants->elementSize);
} }
} }
if (info->type == SHADER_GRAPHICS) { gpu_layout* resourceLayout = state.layouts.data[shader->layout].gpu;
gpu.layouts[0] = state.layouts.data[LAYOUT_BUILTIN].gpu; gpu_layout* uniformsLayout = shader->uniformSize > 0 ? state.layouts.data[LAYOUT_UNIFORMS].gpu : NULL;
gpu.layouts[1] = state.layouts.data[LAYOUT_MATERIAL].gpu;
}
gpu.layouts[resourceSet] = state.layouts.data[shader->layout].gpu; if (info->type == SHADER_GRAPHICS) {
if (shader->uniformSize > 0) gpu.layouts[uniformSet] = state.layouts.data[LAYOUT_UNIFORMS].gpu; gpu.layouts[0] = state.layouts.data[LAYOUT_BUILTINS].gpu;
gpu.layouts[1] = state.layouts.data[LAYOUT_MATERIAL].gpu;
gpu.layouts[2] = resourceLayout;
gpu.layouts[3] = uniformsLayout;
} else {
gpu.layouts[0] = resourceLayout;
gpu.layouts[1] = uniformsLayout;
}
gpu_shader_init(shader->gpu, &gpu); gpu_shader_init(shader->gpu, &gpu);
lovrShaderInit(shader); lovrShaderInit(shader);
@ -5230,7 +5250,6 @@ void lovrPassReset(Pass* pass) {
pass->pipeline = lovrPassAllocate(pass, PIPELINE_STACK_SIZE * sizeof(Pipeline)); pass->pipeline = lovrPassAllocate(pass, PIPELINE_STACK_SIZE * sizeof(Pipeline));
pass->bindings = lovrPassAllocate(pass, 32 * sizeof(gpu_binding)); pass->bindings = lovrPassAllocate(pass, 32 * sizeof(gpu_binding));
pass->uniforms = NULL; pass->uniforms = NULL;
pass->uniformSize = 0;
pass->computeCount = 0; pass->computeCount = 0;
pass->computes = NULL; pass->computes = NULL;
pass->drawCount = 0; pass->drawCount = 0;
@ -5279,7 +5298,6 @@ void lovrPassReset(Pass* pass) {
memset(pass->scissor, 0, sizeof(pass->scissor)); memset(pass->scissor, 0, sizeof(pass->scissor));
pass->sampler = NULL; pass->sampler = NULL;
pass->bindingMask = 0;
} }
const PassStats* lovrPassGetStats(Pass* pass) { const PassStats* lovrPassGetStats(Pass* pass) {
@ -5654,88 +5672,96 @@ void lovrPassSetSampler(Pass* pass, Sampler* sampler) {
} }
void lovrPassSetShader(Pass* pass, Shader* shader) { void lovrPassSetShader(Pass* pass, Shader* shader) {
Shader* previous = pass->pipeline->shader; Shader* old = pass->pipeline->shader;
if (shader == previous) return;
bool fromCompute = previous && previous->info.type == SHADER_COMPUTE; if (shader == old) {
bool toCompute = shader && shader->info.type == SHADER_COMPUTE; return;
if (fromCompute ^ toCompute) {
pass->bindingMask = 0;
} }
// Clear any bindings for resources that share the same slot but have different types
if (shader) { if (shader) {
if (previous) { gpu_binding bindings[32];
for (uint32_t i = 0, j = 0; i < previous->resourceCount && j < shader->resourceCount;) {
if (previous->resources[i].binding < shader->resources[j].binding) { // Ensure there's a valid binding for every resource in the new shader. If the old shader had a
i++; // binding with the same name and type, then use that, otherwise use a "default" resource.
} else if (previous->resources[i].binding > shader->resources[j].binding) { for (uint32_t i = 0; i < shader->resourceCount; i++) {
j++; ShaderResource* resource = &shader->resources[i];
} else { bool useDefault = true;
if (previous->resources[i].type != shader->resources[j].type) {
pass->bindingMask &= ~(1u << shader->resources[j].binding); if (old) {
ShaderResource* other = old->resources;
for (uint32_t j = 0; j < old->resourceCount; j++, other++) {
if (other->hash == resource->hash && other->type == resource->type) {
bindings[resource->binding] = pass->bindings[other->binding];
useDefault = false;
break;
} }
i++; }
j++; }
if (useDefault) {
switch (resource->type) {
case GPU_SLOT_UNIFORM_BUFFER:
case GPU_SLOT_STORAGE_BUFFER:
bindings[i].buffer.object = state.defaultBuffer->gpu;
bindings[i].buffer.offset = state.defaultBuffer->base;
bindings[i].buffer.extent = state.defaultBuffer->info.size;
break;
case GPU_SLOT_SAMPLED_TEXTURE:
case GPU_SLOT_STORAGE_TEXTURE:
bindings[i].texture = state.defaultTexture->gpu;
break;
case GPU_SLOT_SAMPLER:
bindings[i].sampler = state.defaultSamplers[FILTER_LINEAR]->gpu;
break;
default: break;
} }
} }
} }
uint32_t shaderSlots = (shader->bufferMask | shader->textureMask | shader->samplerMask); memcpy(pass->bindings, bindings, shader->resourceCount * sizeof(gpu_binding));
uint32_t missingResources = shaderSlots & ~pass->bindingMask; pass->flags |= DIRTY_BINDINGS;
// Assign default bindings to any slots used by the shader that are missing resources // Uniform data is preserved for uniforms with the same name/size (this might be slow...)
if (missingResources) { if (shader->uniformCount > 0) {
for (uint32_t i = 0; i < 32; i++) { // TODO biterationtrinsics void* uniforms = lovrPassAllocate(pass, shader->uniformSize);
uint32_t bit = (1u << i);
if (~missingResources & bit) { if (old && old->uniformCount > 0) {
continue; for (uint32_t i = 0; i < shader->uniformCount; i++) {
DataField* uniform = &shader->uniforms[i];
DataField* other = old->uniforms;
for (uint32_t j = 0; j < old->uniformCount; j++, other++) {
if (uniform->hash == other->hash && uniform->stride == other->stride && uniform->length == other->length) {
void* src = (char*) pass->uniforms + other->offset;
void* dst = (char*) uniforms + uniform->offset;
size_t size = uniform->stride * MAX(uniform->length, 1);
memcpy(dst, src, size);
}
}
} }
} else {
pass->bindings[i].number = i; memset(uniforms, 0, shader->uniformSize);
if (shader->bufferMask & bit) {
pass->bindings[i].buffer.object = state.defaultBuffer->gpu;
pass->bindings[i].buffer.offset = state.defaultBuffer->base;
pass->bindings[i].buffer.extent = state.defaultBuffer->info.size;
} else if (shader->textureMask & bit) {
pass->bindings[i].texture = state.defaultTexture->gpu;
} else if (shader->samplerMask & bit) {
pass->bindings[i].sampler = state.defaultSamplers[FILTER_LINEAR]->gpu;
}
pass->bindingMask |= bit;
} }
pass->flags |= DIRTY_BINDINGS; pass->uniforms = uniforms;
pass->flags |= DIRTY_UNIFORMS;
} else {
pass->flags &= ~DIRTY_UNIFORMS;
}
// Custom vertex attributes must be reset: their locations may differ even if the names match
if (shader->hasCustomAttributes) {
pass->pipeline->lastVertexBuffer = NULL;
} }
pass->pipeline->info.shader = shader->gpu; pass->pipeline->info.shader = shader->gpu;
pass->pipeline->info.flags = shader->flags; pass->pipeline->info.flags = shader->flags;
pass->pipeline->info.flagCount = shader->overrideCount; pass->pipeline->info.flagCount = shader->overrideCount;
lovrRetain(shader);
} }
lovrRetain(shader); lovrRelease(old, lovrShaderDestroy);
lovrRelease(previous, lovrShaderDestroy);
pass->pipeline->shader = shader; pass->pipeline->shader = shader;
pass->pipeline->dirty = true; pass->pipeline->dirty = true;
// If the shader changes, all the attribute names need to be wired up again, because attributes
// with the same name might have different locations. But if the shader only uses built-in
// attributes (which is common), things will remain stable.
if ((shader && shader->hasCustomAttributes) || (previous && previous->hasCustomAttributes)) {
pass->pipeline->lastVertexBuffer = NULL;
}
if (shader && shader->uniformSize > pass->uniformSize) {
void* uniforms = lovrPassAllocate(pass, shader->uniformSize);
if (pass->uniforms) memcpy(uniforms, pass->uniforms, pass->uniformSize);
pass->uniformSize = shader->uniformSize;
pass->uniforms = uniforms;
pass->flags |= DIRTY_UNIFORMS;
}
} }
void lovrPassSetStencilTest(Pass* pass, CompareMode test, uint8_t value, uint8_t mask) { void lovrPassSetStencilTest(Pass* pass, CompareMode test, uint8_t value, uint8_t mask) {
@ -5823,7 +5849,6 @@ void lovrPassSendBuffer(Pass* pass, const char* name, size_t length, Buffer* buf
pass->bindings[slot].buffer.object = buffer->gpu; pass->bindings[slot].buffer.object = buffer->gpu;
pass->bindings[slot].buffer.offset = buffer->base + offset; pass->bindings[slot].buffer.offset = buffer->base + offset;
pass->bindings[slot].buffer.extent = extent; pass->bindings[slot].buffer.extent = extent;
pass->bindingMask |= (1u << slot);
pass->flags |= DIRTY_BINDINGS; pass->flags |= DIRTY_BINDINGS;
} }
@ -5845,7 +5870,6 @@ void lovrPassSendTexture(Pass* pass, const char* name, size_t length, Texture* t
trackTexture(pass, texture, resource->phase, resource->cache); trackTexture(pass, texture, resource->phase, resource->cache);
pass->bindings[slot].texture = view; pass->bindings[slot].texture = view;
pass->bindingMask |= (1u << slot);
pass->flags |= DIRTY_BINDINGS; pass->flags |= DIRTY_BINDINGS;
} }
@ -5858,7 +5882,6 @@ void lovrPassSendSampler(Pass* pass, const char* name, size_t length, Sampler* s
lovrCheck(shader->samplerMask & (1u << slot), "Trying to send a Sampler to '%s', but the active Shader doesn't have a Sampler in that slot", name); lovrCheck(shader->samplerMask & (1u << slot), "Trying to send a Sampler to '%s', but the active Shader doesn't have a Sampler in that slot", name);
pass->bindings[slot].sampler = sampler->gpu; pass->bindings[slot].sampler = sampler->gpu;
pass->bindingMask |= (1u << slot);
pass->flags |= DIRTY_BINDINGS; pass->flags |= DIRTY_BINDINGS;
} }
@ -5885,7 +5908,6 @@ void lovrPassSendData(Pass* pass, const char* name, size_t length, void** data,
uint32_t size = resource->format->stride * MAX(resource->format->length, 1); uint32_t size = resource->format->stride * MAX(resource->format->length, 1);
BufferView view = lovrPassGetBuffer(pass, size, state.limits.uniformBufferAlign); BufferView view = lovrPassGetBuffer(pass, size, state.limits.uniformBufferAlign);
pass->bindings[slot].buffer = (gpu_buffer_binding) { view.buffer, view.offset, view.extent }; pass->bindings[slot].buffer = (gpu_buffer_binding) { view.buffer, view.offset, view.extent };
pass->bindingMask |= (1u << slot);
pass->flags |= DIRTY_BINDINGS; pass->flags |= DIRTY_BINDINGS;
*data = view.pointer; *data = view.pointer;
@ -6059,6 +6081,8 @@ static gpu_bundle_info* lovrPassResolveBindings(Pass* pass, Shader* shader, gpu_
for (uint32_t i = 0; i < bundle->count; i++) { for (uint32_t i = 0; i < bundle->count; i++) {
bundle->bindings[i] = pass->bindings[shader->resources[i].binding]; bundle->bindings[i] = pass->bindings[shader->resources[i].binding];
bundle->bindings[i].type = shader->resources[i].type; bundle->bindings[i].type = shader->resources[i].type;
bundle->bindings[i].number = shader->resources[i].binding;
bundle->bindings[i].count = 0;
} }
pass->flags &= ~DIRTY_BINDINGS; pass->flags &= ~DIRTY_BINDINGS;

View File

@ -323,6 +323,7 @@ typedef struct {
ShaderFlag* flags; ShaderFlag* flags;
uint32_t flagCount; uint32_t flagCount;
const char* label; const char* label;
bool isDefault;
} ShaderInfo; } ShaderInfo;
typedef void* ShaderIncluder(const char* filename, size_t* bytesRead); typedef void* ShaderIncluder(const char* filename, size_t* bytesRead);