Compare commits

...

7 Commits

Author SHA1 Message Date
bjorn ec380e0cfd rm pass uniformSize;
It's no longer necessary.
2024-02-25 14:58:11 -08:00
bjorn fa8ea6732b rm pass binding mask;
It's no longer necessary.
2024-02-25 14:57:27 -08:00
bjorn 466a052ded Cleanup; 2024-02-24 15:45:30 -08:00
bjorn 2fe5ba8f3b Ensure all binding fields are fully initialized; 2024-02-24 15:45:10 -08:00
bjorn ae19b7aad3 Fix resource binding numbers; 2024-02-24 15:33:09 -08:00
bjorn 652a074677 Update resource invalidation when switching shaders;
Previously, when switching shaders, resource bindings would be preserved
for resources with matching slots/types.  This doesn't make sense in a
world where binding numbers are internal.  Instead, match up resources
by name/type.

Additionally, rewire all the uniforms by name/size so uniforms with the
same name get preserved (name/type would be too hard for e.g. structs).
This seems like it would be horribly slow and may need to be optimized,
optional, or removed.

I didn't test any of this lol, but I will I promise.
2024-02-24 14:34:29 -08:00
bjorn bd83ad6eb4 Cleanup; 2024-02-24 11:49:11 -08:00
5 changed files with 256 additions and 232 deletions

View File

@ -1072,10 +1072,12 @@ static int l_lovrGraphicsNewShader(lua_State* L) {
Shader* shader = lovrShaderCreate(&info);
luax_pushtype(L, Shader, shader);
lovrRelease(shader, lovrShaderDestroy);
if (shouldFree[0]) free((void*) source[0].code);
if (shouldFree[1]) free((void*) source[1].code);
if (source[0].code != compiled[0].code) free((void*) compiled[0].code);
if (source[1].code != compiled[1].code) free((void*) compiled[1].code);
for (uint32_t i = 0; i < info.stageCount; i++) {
if (shouldFree[i]) free((void*) source[i].code);
if (source[i].code != compiled[i].code) free((void*) compiled[i].code);
}
arr_free(&flags);
return 1;
}

View File

@ -245,16 +245,16 @@ void gpu_layout_destroy(gpu_layout* layout);
// Shader
typedef struct {
uint32_t stage;
const void* code;
size_t length;
} gpu_shader_stage;
} gpu_shader_source;
typedef struct {
gpu_shader_stage vertex;
gpu_shader_stage fragment;
gpu_shader_stage compute;
gpu_layout* layouts[4];
uint32_t stageCount;
gpu_shader_source* stages;
uint32_t pushConstantSize;
gpu_layout* layouts[4];
const char* label;
} gpu_shader_info;

View File

@ -988,31 +988,28 @@ void gpu_layout_destroy(gpu_layout* layout) {
// Shader
bool gpu_shader_init(gpu_shader* shader, gpu_shader_info* info) {
struct { VkShaderStageFlags flags; gpu_shader_stage* source; } stages[] = {
{ VK_SHADER_STAGE_VERTEX_BIT, &info->vertex },
{ VK_SHADER_STAGE_FRAGMENT_BIT, &info->fragment },
{ VK_SHADER_STAGE_COMPUTE_BIT, &info->compute }
};
uint32_t stageCount = 0;
VkShaderStageFlags stageFlags = 0;
for (uint32_t i = 0; i < COUNTOF(stages); i++) {
if (!stages[i].source->code) continue;
for (uint32_t i = 0; i < info->stageCount; i++) {
switch (info->stages[i].stage) {
case GPU_STAGE_VERTEX: stageFlags |= VK_SHADER_STAGE_VERTEX_BIT; break;
case GPU_STAGE_FRAGMENT: stageFlags |= VK_SHADER_STAGE_FRAGMENT_BIT; break;
case GPU_STAGE_COMPUTE: stageFlags |= VK_SHADER_STAGE_COMPUTE_BIT; break;
default: return false;
}
}
for (uint32_t i = 0; i < info->stageCount; i++) {
VkShaderModuleCreateInfo moduleInfo = {
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.codeSize = stages[i].source->length,
.pCode = stages[i].source->code
.codeSize = info->stages[i].length,
.pCode = info->stages[i].code
};
VK(vkCreateShaderModule(state.device, &moduleInfo, NULL, &shader->handles[stageCount]), "Failed to load shader") {
VK(vkCreateShaderModule(state.device, &moduleInfo, NULL, &shader->handles[i]), "Failed to load shader") {
return false;
}
nickname(shader->handles[i], VK_OBJECT_TYPE_SHADER_MODULE, info->label);
stageFlags |= stages[i].flags;
stageCount++;
}
VkDescriptorSetLayout layouts[4];

View File

@ -30,7 +30,7 @@
#define PIPELINE_STACK_SIZE 4
#define MAX_SHADER_RESOURCES 32
#define MAX_CUSTOM_ATTRIBUTES 10
#define LAYOUT_BUILTIN 0
#define LAYOUT_BUILTINS 0
#define LAYOUT_MATERIAL 1
#define LAYOUT_UNIFORMS 2
#define FLOAT_BITS(f) ((union { float f; uint32_t u; }) { f }).u
@ -525,9 +525,7 @@ struct Pass {
uint32_t transformIndex;
uint32_t pipelineIndex;
gpu_binding* bindings;
uint32_t bindingMask;
void* uniforms;
uint32_t uniformSize;
uint32_t computeCount;
Compute* computes;
uint32_t drawCount;
@ -698,7 +696,7 @@ bool lovrGraphicsInit(GraphicsConfig* config) {
};
size_t builtinLayout = getLayout(builtinSlots, COUNTOF(builtinSlots));
if (builtinLayout != LAYOUT_BUILTIN) lovrUnreachable();
if (builtinLayout != LAYOUT_BUILTINS) lovrUnreachable();
gpu_slot materialSlots[] = {
{ 0, GPU_SLOT_UNIFORM_BUFFER, GPU_STAGE_GRAPHICS }, // Data
@ -1293,7 +1291,7 @@ static void recordRenderPass(Pass* pass, gpu_stream* stream) {
data->color[3] = draw->color[3];
}
gpu_bundle* builtinBundle = getBundle(LAYOUT_BUILTIN, builtins, COUNTOF(builtins));
gpu_bundle* builtinBundle = getBundle(LAYOUT_BUILTINS, builtins, COUNTOF(builtins));
// Pipelines
@ -2820,7 +2818,8 @@ Shader* lovrGraphicsGetDefaultShader(DefaultShader type) {
},
.stageCount = 1,
.flags = &(ShaderFlag) { NULL, 0, state.device.subgroupSize },
.flagCount = 1
.flagCount = 1,
.isDefault = true
});
default:
return state.defaultShaders[type] = lovrShaderCreate(&(ShaderInfo) {
@ -2829,7 +2828,8 @@ Shader* lovrGraphicsGetDefaultShader(DefaultShader type) {
lovrGraphicsGetDefaultShaderSource(type, STAGE_VERTEX),
lovrGraphicsGetDefaultShaderSource(type, STAGE_FRAGMENT)
},
.stageCount = 2
.stageCount = 2,
.isDefault = true
});
}
}
@ -2837,9 +2837,11 @@ Shader* lovrGraphicsGetDefaultShader(DefaultShader type) {
Shader* lovrShaderCreate(const ShaderInfo* info) {
Shader* shader = calloc(1, sizeof(Shader) + gpu_sizeof_shader());
lovrAssert(shader, "Out of memory");
shader->ref = 1;
shader->gpu = (gpu_shader*) (shader + 1);
shader->info = *info;
size_t stack = tempPush(&state.allocator);
// Validate stage combinations
for (uint32_t i = 0; i < info->stageCount; i++) {
shader->stageMask |= (1 << info->stages[i].stage);
}
@ -2850,7 +2852,9 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
lovrCheck(shader->stageMask == FLAG_COMPUTE, "Compute shaders can only have a compute stage");
}
// Copy the source, because we perform edits on the SPIR-V and the input might be readonly memory
size_t stack = tempPush(&state.allocator);
// Copy the source to temp memory (we perform edits on the SPIR-V and the input might be readonly)
void* source[2];
for (uint32_t i = 0; i < info->stageCount; i++) {
source[i] = tempAlloc(&state.allocator, info->stages[i].size);
@ -2891,25 +2895,31 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
}
}
// Allocate
gpu_slot* slots = tempAlloc(&state.allocator, maxResources * sizeof(gpu_slot));
// Allocate memory
shader->resources = malloc(maxResources * sizeof(ShaderResource));
shader->fields = malloc(maxFields * sizeof(DataField));
shader->names = malloc(maxChars);
shader->flags = malloc(maxSpecConstants * sizeof(gpu_shader_flag));
shader->flagLookup = malloc(maxSpecConstants * sizeof(uint32_t));
shader->names = malloc(maxChars);
lovrAssert(shader->resources && shader->fields && shader->names, "Out of memory");
lovrAssert(shader->flags && shader->flagLookup, "Out of memory");
lovrAssert(shader->resources, "Out of memory");
lovrAssert(shader->fields, "Out of memory");
lovrAssert(shader->names, "Out of memory");
lovrAssert(shader->flags, "Out of memory");
lovrAssert(shader->flagLookup, "Out of memory");
// Stage-specific metadata
// Workgroup size
if (info->type == SHADER_COMPUTE) {
memcpy(shader->workgroupSize, spv[0].workgroupSize, 3 * sizeof(uint32_t));
lovrCheck(shader->workgroupSize[0] <= state.limits.workgroupSize[0], "Shader workgroup size exceeds the 'workgroupSize' limit");
lovrCheck(shader->workgroupSize[1] <= state.limits.workgroupSize[1], "Shader workgroup size exceeds the 'workgroupSize' limit");
lovrCheck(shader->workgroupSize[2] <= state.limits.workgroupSize[2], "Shader workgroup size exceeds the 'workgroupSize' limit");
uint32_t totalWorkgroupSize = shader->workgroupSize[0] * shader->workgroupSize[1] * shader->workgroupSize[2];
uint32_t* workgroupSize = spv[0].workgroupSize;
uint32_t totalWorkgroupSize = workgroupSize[0] * workgroupSize[1] * workgroupSize[2];
lovrCheck(workgroupSize[0] <= state.limits.workgroupSize[0], "Shader workgroup size exceeds the 'workgroupSize' limit");
lovrCheck(workgroupSize[1] <= state.limits.workgroupSize[1], "Shader workgroup size exceeds the 'workgroupSize' limit");
lovrCheck(workgroupSize[2] <= state.limits.workgroupSize[2], "Shader workgroup size exceeds the 'workgroupSize' limit");
lovrCheck(totalWorkgroupSize <= state.limits.totalWorkgroupSize, "Shader workgroup size exceeds the 'totalWorkgroupSize' limit");
} else if (spv[0].attributeCount > 0) {
memcpy(shader->workgroupSize, workgroupSize, 3 * sizeof(uint32_t));
}
// Vertex attributes
if (info->type == SHADER_GRAPHICS && spv[0].attributeCount > 0) {
shader->attributeCount = spv[0].attributeCount;
shader->attributes = malloc(shader->attributeCount * sizeof(ShaderAttribute));
lovrAssert(shader->attributes, "Out of memory");
@ -2922,71 +2932,18 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
uint32_t resourceSet = info->type == SHADER_COMPUTE ? 0 : 2;
uint32_t uniformSet = info->type == SHADER_COMPUTE ? 1 : 3;
uint32_t lastResourceCount = 0;
// Resources
for (uint32_t s = 0; s < info->stageCount; s++, lastResourceCount = shader->resourceCount) {
for (uint32_t s = 0, lastResourceCount = 0; s < info->stageCount; s++, lastResourceCount = shader->resourceCount) {
ShaderStage stage = info->stages[s].stage;
for (uint32_t i = 0; i < spv[s].resourceCount; i++) {
spv_resource* resource = &spv[s].resources[i];
// It's safe to cast away const because we are operating on a copy of the input
uint32_t* set = (uint32_t*) resource->set;
uint32_t* binding = (uint32_t*) resource->binding;
if (!set || !binding) {
continue;
}
if (!(*set == resourceSet || (*set == 0 && *binding > LAST_BUILTIN_BINDING))) {
continue;
}
lovrCheck(resource->arraySize == 0, "Arrays of resources in shaders are not currently supported");
lovrCheck(resource->type != SPV_COMBINED_TEXTURE_SAMPLER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " combined texture sampler", " (use e.g. texture2D instead of sampler2D)");
lovrCheck(resource->type != SPV_UNIFORM_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " uniform texel buffer", "");
lovrCheck(resource->type != SPV_STORAGE_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " storage texel buffer", "");
lovrCheck(resource->type != SPV_INPUT_ATTACHMENT, "Shader variable '%s' is a%s, which is not supported%s", resource->name, "n input attachment", "");
static const gpu_slot_type resourceTypes[] = {
[SPV_UNIFORM_BUFFER] = GPU_SLOT_UNIFORM_BUFFER,
[SPV_STORAGE_BUFFER] = GPU_SLOT_STORAGE_BUFFER,
[SPV_SAMPLED_TEXTURE] = GPU_SLOT_SAMPLED_TEXTURE,
[SPV_STORAGE_TEXTURE] = GPU_SLOT_STORAGE_TEXTURE,
[SPV_SAMPLER] = GPU_SLOT_SAMPLER
};
gpu_phase stageMap[] = {
[STAGE_VERTEX] = GPU_STAGE_VERTEX,
[STAGE_FRAGMENT] = GPU_STAGE_FRAGMENT,
[STAGE_COMPUTE] = GPU_STAGE_COMPUTE
};
gpu_phase stagePhase[] = {
[STAGE_VERTEX] = GPU_PHASE_SHADER_VERTEX,
[STAGE_FRAGMENT] = GPU_PHASE_SHADER_FRAGMENT,
[STAGE_COMPUTE] = GPU_PHASE_SHADER_COMPUTE
};
uint32_t hash = (uint32_t) hash64(resource->name, strlen(resource->name));
bool skip = false;
// Merge resources between shader stages by name
for (uint32_t j = 0; j < lastResourceCount; j++) {
ShaderResource* other = &shader->resources[j];
if (other->hash == hash) {
lovrCheck(other->type == resourceTypes[resource->type], "Shader variable '%s' is declared in multiple shader stages with different types", resource->name);
slots[j].stages |= stageMap[stage];
shader->resources[j].phase |= stagePhase[stage];
*set = resourceSet;
*binding = shader->resources[j].binding;
skip = true;
break;
}
}
if (skip) {
continue;
}
// glslang outputs gl_DefaultUniformBlock, there's also the Constants macro which defines a DefaultUniformBlock UBO
if (!strcmp(resource->name, "gl_DefaultUniformBlock") || !strcmp(resource->name, "DefaultUniformBlock")) {
spv_field* block = resource->bufferFields;
shader->uniformSize = block->elementSize;
@ -2997,64 +2954,109 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
continue;
}
uint32_t index = shader->resourceCount++;
lovrAssert(index < MAX_SHADER_RESOURCES, "Shader resource count exceeds resourcesPerShader limit (%d)", MAX_SHADER_RESOURCES);
if (*set != resourceSet) {
*set = resourceSet;
*binding = index;
// Skip builtin resources
if (info->type == SHADER_GRAPHICS && ((*set == 0 && *binding <= LAST_BUILTIN_BINDING) || *set == 1)) {
continue;
}
slots[index] = (gpu_slot) {
.number = *binding,
.type = resourceTypes[resource->type],
.stages = stageMap[stage]
static const gpu_slot_type types[] = {
[SPV_UNIFORM_BUFFER] = GPU_SLOT_UNIFORM_BUFFER,
[SPV_STORAGE_BUFFER] = GPU_SLOT_STORAGE_BUFFER,
[SPV_SAMPLED_TEXTURE] = GPU_SLOT_SAMPLED_TEXTURE,
[SPV_STORAGE_TEXTURE] = GPU_SLOT_STORAGE_TEXTURE,
[SPV_SAMPLER] = GPU_SLOT_SAMPLER
};
shader->resources[index] = (ShaderResource) {
.hash = hash,
.binding = *binding,
.type = resourceTypes[resource->type],
.phase = stagePhase[stage]
gpu_phase phases[] = {
[STAGE_VERTEX] = GPU_PHASE_SHADER_VERTEX,
[STAGE_FRAGMENT] = GPU_PHASE_SHADER_FRAGMENT,
[STAGE_COMPUTE] = GPU_PHASE_SHADER_COMPUTE
};
if (resource->bufferFields) {
spv_field* field = &resource->bufferFields[0];
gpu_slot_type type = types[resource->type];
gpu_phase phase = phases[stage];
// Unwrap the container struct if it just contains a single struct or array of structs
if (field->fieldCount == 1 && field->totalFieldCount > 1) {
field = &field->fields[0];
} else if (field->totalFieldCount == 1 && field->fields[0].arrayLength > 0) {
// Arrays of non-aggregates get converted to an array of single-element structs to better
// match the way buffer formats work. Note that we edit the spv_field, because DataFields
// get initialized later and so any edits to them would get overwritten.
spv_field* child = &field->fields[0];
field->arrayLength = child->arrayLength;
field->arrayStride = child->arrayStride;
field->elementSize = child->elementSize;
field->type = child->type; // This allows the field to be used as both AoS and single-field array
child->arrayLength = 0;
child->arrayStride = 0;
// Merge resources between shader stages, by name
bool merged = false;
uint32_t hash = (uint32_t) hash64(resource->name, strlen(resource->name));
for (uint32_t j = 0; j < lastResourceCount; j++) {
ShaderResource* other = &shader->resources[j];
if (other->hash == hash) {
lovrCheck(other->type == type, "Shader variable '%s' is declared in multiple shader stages with different types", resource->name);
*set = resourceSet;
*binding = shader->resources[j].binding;
shader->resources[j].phase |= phase;
merged = true;
break;
}
}
shader->resources[index].fieldCount = field->totalFieldCount + 1;
shader->resources[index].format = shader->fields + ((s == 1 ? spv[0].fieldCount : 0) + (field - spv[s].fields));
if (merged) {
continue;
}
uint32_t index = shader->resourceCount++;
lovrCheck(index < MAX_SHADER_RESOURCES, "Shader resource count exceeds resourcesPerShader limit (%d)", MAX_SHADER_RESOURCES);
lovrCheck(resource->type != SPV_COMBINED_TEXTURE_SAMPLER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " combined texture sampler", " (use e.g. texture2D instead of sampler2D)");
lovrCheck(resource->type != SPV_UNIFORM_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " uniform texel buffer", "");
lovrCheck(resource->type != SPV_STORAGE_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " storage texel buffer", "");
lovrCheck(resource->type != SPV_INPUT_ATTACHMENT, "Shader variable '%s' is a%s, which is not supported%s", resource->name, "n input attachment", "");
lovrCheck(resource->arraySize == 0, "Arrays of resources in shaders are not currently supported");
// Move resources into set #2 and give them auto-incremented binding numbers starting at zero
// Compute shaders don't need remapping since everything's in set #0 and there are no builtins
if (!info->isDefault && info->type == SHADER_GRAPHICS && *set == 0 && *binding > LAST_BUILTIN_BINDING) {
*set = resourceSet;
*binding = index;
}
bool buffer = resource->type == SPV_UNIFORM_BUFFER || resource->type == SPV_STORAGE_BUFFER;
bool texture = resource->type == SPV_SAMPLED_TEXTURE || resource->type == SPV_STORAGE_TEXTURE;
bool sampler = resource->type == SPV_SAMPLER;
bool storage = resource->type == SPV_STORAGE_BUFFER || resource->type == SPV_STORAGE_TEXTURE;
shader->bufferMask |= (buffer << *binding);
shader->textureMask |= (texture << *binding);
shader->samplerMask |= (sampler << *binding);
shader->storageMask |= (storage << *binding);
shader->bufferMask |= (buffer << index);
shader->textureMask |= (texture << index);
shader->samplerMask |= (sampler << index);
shader->storageMask |= (storage << index);
gpu_cache cache;
if (storage) {
shader->resources[index].cache = stage == STAGE_COMPUTE ? GPU_CACHE_STORAGE_WRITE : GPU_CACHE_STORAGE_READ;
cache = info->type == SHADER_COMPUTE ? GPU_CACHE_STORAGE_WRITE : GPU_CACHE_STORAGE_READ;
} else {
shader->resources[index].cache = texture ? GPU_CACHE_TEXTURE : GPU_CACHE_UNIFORM;
cache = texture ? GPU_CACHE_TEXTURE : GPU_CACHE_UNIFORM;
}
shader->resources[index] = (ShaderResource) {
.hash = hash,
.binding = *binding,
.type = type,
.phase = phase,
.cache = cache
};
if (buffer && resource->bufferFields) {
spv_field* field = &resource->bufferFields[0];
// The following conversions take place, for convenience and to better match Buffer formats:
// - Struct containing either single struct or single array of structs gets unwrapped
// - Struct containing single array of non-structs gets converted to array of single-field structs
if (field->fieldCount == 1 && field->totalFieldCount > 1) {
field = &field->fields[0];
} else if (field->totalFieldCount == 1 && field->fields[0].arrayLength > 0) {
spv_field* child = &field->fields[0];
field->arrayLength = child->arrayLength;
field->arrayStride = child->arrayStride;
field->elementSize = child->elementSize;
field->type = child->type;
child->arrayLength = 0;
child->arrayStride = 0;
}
shader->resources[index].fieldCount = field->totalFieldCount + 1;
shader->resources[index].format = shader->fields + ((s == 1 ? spv[0].fieldCount : 0) + (field - spv[s].fields));
}
}
}
@ -3099,9 +3101,7 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
.length = field->arrayLength,
.stride = field->arrayLength > 0 ? field->arrayStride : field->elementSize, // Use stride as element size for non-arrays
.fieldCount = field->fieldCount,
.fields = field->fields ?
shader->fields + base + (field->fields - spv[s].fields) :
NULL
.fields = field->fields ? shader->fields + base + (field->fields - spv[s].fields) : NULL
};
if (field->name) {
@ -3163,40 +3163,60 @@ Shader* lovrShaderCreate(const ShaderInfo* info) {
}
}
// Push constants
uint32_t pushConstantSize = 0;
for (uint32_t i = 0; i < info->stageCount; i++) {
if (spv[i].pushConstants) {
pushConstantSize = MAX(pushConstantSize, spv[i].pushConstants->elementSize);
}
// Layout
gpu_slot* slots = tempAlloc(&state.allocator, shader->resourceCount * sizeof(gpu_slot));
for (uint32_t i = 0; i < shader->resourceCount; i++) {
ShaderResource* resource = &shader->resources[i];
slots[i] = (gpu_slot) {
.number = resource->binding,
.type = resource->type,
.stages =
((resource->phase & GPU_PHASE_SHADER_VERTEX) ? GPU_STAGE_VERTEX : 0) |
((resource->phase & GPU_PHASE_SHADER_FRAGMENT) ? GPU_STAGE_FRAGMENT : 0) |
((resource->phase & GPU_PHASE_SHADER_COMPUTE) ? GPU_STAGE_COMPUTE : 0)
};
}
shader->ref = 1;
shader->gpu = (gpu_shader*) (shader + 1);
shader->info = *info;
shader->layout = getLayout(slots, shader->resourceCount);
gpu_shader_info gpu = {
.pushConstantSize = pushConstantSize,
.stageCount = info->stageCount,
.stages = tempAlloc(&state.allocator, info->stageCount * sizeof(gpu_shader_source)),
.label = info->label
};
for (uint32_t i = 0; i < info->stageCount; i++) {
switch (info->stages[i].stage) {
case STAGE_VERTEX: gpu.vertex = (gpu_shader_stage) { .code = source[i], .length = info->stages[i].size }; break;
case STAGE_FRAGMENT: gpu.fragment = (gpu_shader_stage) { .code = source[i], .length = info->stages[i].size }; break;
case STAGE_COMPUTE: gpu.compute = (gpu_shader_stage) { .code = source[i], .length = info->stages[i].size }; break;
default: break;
const uint32_t stageMap[] = {
[STAGE_VERTEX] = GPU_STAGE_VERTEX,
[STAGE_FRAGMENT] = GPU_STAGE_FRAGMENT,
[STAGE_COMPUTE] = GPU_STAGE_COMPUTE
};
gpu.stages[i] = (gpu_shader_source) {
.stage = stageMap[info->stages[i].stage],
.code = source[i],
.length = info->stages[i].size
};
}
for (uint32_t i = 0; i < info->stageCount; i++) {
if (spv[i].pushConstants) {
gpu.pushConstantSize = MAX(gpu.pushConstantSize, spv[i].pushConstants->elementSize);
}
}
if (info->type == SHADER_GRAPHICS) {
gpu.layouts[0] = state.layouts.data[LAYOUT_BUILTIN].gpu;
gpu.layouts[1] = state.layouts.data[LAYOUT_MATERIAL].gpu;
}
gpu_layout* resourceLayout = state.layouts.data[shader->layout].gpu;
gpu_layout* uniformsLayout = shader->uniformSize > 0 ? state.layouts.data[LAYOUT_UNIFORMS].gpu : NULL;
gpu.layouts[resourceSet] = state.layouts.data[shader->layout].gpu;
if (shader->uniformSize > 0) gpu.layouts[uniformSet] = state.layouts.data[LAYOUT_UNIFORMS].gpu;
if (info->type == SHADER_GRAPHICS) {
gpu.layouts[0] = state.layouts.data[LAYOUT_BUILTINS].gpu;
gpu.layouts[1] = state.layouts.data[LAYOUT_MATERIAL].gpu;
gpu.layouts[2] = resourceLayout;
gpu.layouts[3] = uniformsLayout;
} else {
gpu.layouts[0] = resourceLayout;
gpu.layouts[1] = uniformsLayout;
}
gpu_shader_init(shader->gpu, &gpu);
lovrShaderInit(shader);
@ -5230,7 +5250,6 @@ void lovrPassReset(Pass* pass) {
pass->pipeline = lovrPassAllocate(pass, PIPELINE_STACK_SIZE * sizeof(Pipeline));
pass->bindings = lovrPassAllocate(pass, 32 * sizeof(gpu_binding));
pass->uniforms = NULL;
pass->uniformSize = 0;
pass->computeCount = 0;
pass->computes = NULL;
pass->drawCount = 0;
@ -5279,7 +5298,6 @@ void lovrPassReset(Pass* pass) {
memset(pass->scissor, 0, sizeof(pass->scissor));
pass->sampler = NULL;
pass->bindingMask = 0;
}
const PassStats* lovrPassGetStats(Pass* pass) {
@ -5654,88 +5672,96 @@ void lovrPassSetSampler(Pass* pass, Sampler* sampler) {
}
void lovrPassSetShader(Pass* pass, Shader* shader) {
Shader* previous = pass->pipeline->shader;
if (shader == previous) return;
Shader* old = pass->pipeline->shader;
bool fromCompute = previous && previous->info.type == SHADER_COMPUTE;
bool toCompute = shader && shader->info.type == SHADER_COMPUTE;
if (fromCompute ^ toCompute) {
pass->bindingMask = 0;
if (shader == old) {
return;
}
// Clear any bindings for resources that share the same slot but have different types
if (shader) {
if (previous) {
for (uint32_t i = 0, j = 0; i < previous->resourceCount && j < shader->resourceCount;) {
if (previous->resources[i].binding < shader->resources[j].binding) {
i++;
} else if (previous->resources[i].binding > shader->resources[j].binding) {
j++;
} else {
if (previous->resources[i].type != shader->resources[j].type) {
pass->bindingMask &= ~(1u << shader->resources[j].binding);
gpu_binding bindings[32];
// Ensure there's a valid binding for every resource in the new shader. If the old shader had a
// binding with the same name and type, then use that, otherwise use a "default" resource.
for (uint32_t i = 0; i < shader->resourceCount; i++) {
ShaderResource* resource = &shader->resources[i];
bool useDefault = true;
if (old) {
ShaderResource* other = old->resources;
for (uint32_t j = 0; j < old->resourceCount; j++, other++) {
if (other->hash == resource->hash && other->type == resource->type) {
bindings[resource->binding] = pass->bindings[other->binding];
useDefault = false;
break;
}
i++;
j++;
}
}
if (useDefault) {
switch (resource->type) {
case GPU_SLOT_UNIFORM_BUFFER:
case GPU_SLOT_STORAGE_BUFFER:
bindings[i].buffer.object = state.defaultBuffer->gpu;
bindings[i].buffer.offset = state.defaultBuffer->base;
bindings[i].buffer.extent = state.defaultBuffer->info.size;
break;
case GPU_SLOT_SAMPLED_TEXTURE:
case GPU_SLOT_STORAGE_TEXTURE:
bindings[i].texture = state.defaultTexture->gpu;
break;
case GPU_SLOT_SAMPLER:
bindings[i].sampler = state.defaultSamplers[FILTER_LINEAR]->gpu;
break;
default: break;
}
}
}
uint32_t shaderSlots = (shader->bufferMask | shader->textureMask | shader->samplerMask);
uint32_t missingResources = shaderSlots & ~pass->bindingMask;
memcpy(pass->bindings, bindings, shader->resourceCount * sizeof(gpu_binding));
pass->flags |= DIRTY_BINDINGS;
// Assign default bindings to any slots used by the shader that are missing resources
if (missingResources) {
for (uint32_t i = 0; i < 32; i++) { // TODO biterationtrinsics
uint32_t bit = (1u << i);
// Uniform data is preserved for uniforms with the same name/size (this might be slow...)
if (shader->uniformCount > 0) {
void* uniforms = lovrPassAllocate(pass, shader->uniformSize);
if (~missingResources & bit) {
continue;
if (old && old->uniformCount > 0) {
for (uint32_t i = 0; i < shader->uniformCount; i++) {
DataField* uniform = &shader->uniforms[i];
DataField* other = old->uniforms;
for (uint32_t j = 0; j < old->uniformCount; j++, other++) {
if (uniform->hash == other->hash && uniform->stride == other->stride && uniform->length == other->length) {
void* src = (char*) pass->uniforms + other->offset;
void* dst = (char*) uniforms + uniform->offset;
size_t size = uniform->stride * MAX(uniform->length, 1);
memcpy(dst, src, size);
}
}
}
pass->bindings[i].number = i;
if (shader->bufferMask & bit) {
pass->bindings[i].buffer.object = state.defaultBuffer->gpu;
pass->bindings[i].buffer.offset = state.defaultBuffer->base;
pass->bindings[i].buffer.extent = state.defaultBuffer->info.size;
} else if (shader->textureMask & bit) {
pass->bindings[i].texture = state.defaultTexture->gpu;
} else if (shader->samplerMask & bit) {
pass->bindings[i].sampler = state.defaultSamplers[FILTER_LINEAR]->gpu;
}
pass->bindingMask |= bit;
} else {
memset(uniforms, 0, shader->uniformSize);
}
pass->flags |= DIRTY_BINDINGS;
pass->uniforms = uniforms;
pass->flags |= DIRTY_UNIFORMS;
} else {
pass->flags &= ~DIRTY_UNIFORMS;
}
// Custom vertex attributes must be reset: their locations may differ even if the names match
if (shader->hasCustomAttributes) {
pass->pipeline->lastVertexBuffer = NULL;
}
pass->pipeline->info.shader = shader->gpu;
pass->pipeline->info.flags = shader->flags;
pass->pipeline->info.flagCount = shader->overrideCount;
lovrRetain(shader);
}
lovrRetain(shader);
lovrRelease(previous, lovrShaderDestroy);
lovrRelease(old, lovrShaderDestroy);
pass->pipeline->shader = shader;
pass->pipeline->dirty = true;
// If the shader changes, all the attribute names need to be wired up again, because attributes
// with the same name might have different locations. But if the shader only uses built-in
// attributes (which is common), things will remain stable.
if ((shader && shader->hasCustomAttributes) || (previous && previous->hasCustomAttributes)) {
pass->pipeline->lastVertexBuffer = NULL;
}
if (shader && shader->uniformSize > pass->uniformSize) {
void* uniforms = lovrPassAllocate(pass, shader->uniformSize);
if (pass->uniforms) memcpy(uniforms, pass->uniforms, pass->uniformSize);
pass->uniformSize = shader->uniformSize;
pass->uniforms = uniforms;
pass->flags |= DIRTY_UNIFORMS;
}
}
void lovrPassSetStencilTest(Pass* pass, CompareMode test, uint8_t value, uint8_t mask) {
@ -5823,7 +5849,6 @@ void lovrPassSendBuffer(Pass* pass, const char* name, size_t length, Buffer* buf
pass->bindings[slot].buffer.object = buffer->gpu;
pass->bindings[slot].buffer.offset = buffer->base + offset;
pass->bindings[slot].buffer.extent = extent;
pass->bindingMask |= (1u << slot);
pass->flags |= DIRTY_BINDINGS;
}
@ -5845,7 +5870,6 @@ void lovrPassSendTexture(Pass* pass, const char* name, size_t length, Texture* t
trackTexture(pass, texture, resource->phase, resource->cache);
pass->bindings[slot].texture = view;
pass->bindingMask |= (1u << slot);
pass->flags |= DIRTY_BINDINGS;
}
@ -5858,7 +5882,6 @@ void lovrPassSendSampler(Pass* pass, const char* name, size_t length, Sampler* s
lovrCheck(shader->samplerMask & (1u << slot), "Trying to send a Sampler to '%s', but the active Shader doesn't have a Sampler in that slot", name);
pass->bindings[slot].sampler = sampler->gpu;
pass->bindingMask |= (1u << slot);
pass->flags |= DIRTY_BINDINGS;
}
@ -5885,7 +5908,6 @@ void lovrPassSendData(Pass* pass, const char* name, size_t length, void** data,
uint32_t size = resource->format->stride * MAX(resource->format->length, 1);
BufferView view = lovrPassGetBuffer(pass, size, state.limits.uniformBufferAlign);
pass->bindings[slot].buffer = (gpu_buffer_binding) { view.buffer, view.offset, view.extent };
pass->bindingMask |= (1u << slot);
pass->flags |= DIRTY_BINDINGS;
*data = view.pointer;
@ -6059,6 +6081,8 @@ static gpu_bundle_info* lovrPassResolveBindings(Pass* pass, Shader* shader, gpu_
for (uint32_t i = 0; i < bundle->count; i++) {
bundle->bindings[i] = pass->bindings[shader->resources[i].binding];
bundle->bindings[i].type = shader->resources[i].type;
bundle->bindings[i].number = shader->resources[i].binding;
bundle->bindings[i].count = 0;
}
pass->flags &= ~DIRTY_BINDINGS;

View File

@ -323,6 +323,7 @@ typedef struct {
ShaderFlag* flags;
uint32_t flagCount;
const char* label;
bool isDefault;
} ShaderInfo;
typedef void* ShaderIncluder(const char* filename, size_t* bytesRead);