diff --git a/etc/shaders/lovr.glsl b/etc/shaders/lovr.glsl index 370f3b5b..ba0e7748 100644 --- a/etc/shaders/lovr.glsl +++ b/etc/shaders/lovr.glsl @@ -28,14 +28,13 @@ struct Camera { }; struct Draw { - mat4 transform; - mat3 normalMatrix; + mat4x3 transform; vec4 color; }; layout(set = 0, binding = 0) uniform Globals { vec2 Resolution; float Time; }; layout(set = 0, binding = 1) uniform CameraBuffer { Camera Cameras[6]; }; -layout(set = 0, binding = 2) uniform DrawBuffer { Draw Draws[256]; }; +layout(set = 0, binding = 2) uniform DrawBuffer { layout(row_major) Draw Draws[256]; }; layout(set = 0, binding = 3) uniform sampler Sampler; layout(set = 1, binding = 0) uniform MaterialBuffer { @@ -154,8 +153,8 @@ layout(location = 14) in vec3 Tangent; #ifdef GL_VERTEX_SHADER #define DrawID gl_BaseInstance -#define Transform Draws[DrawID].transform -#define NormalMatrix Draws[DrawID].normalMatrix +#define Transform mat4(Draws[DrawID].transform) +#define NormalMatrix (cofactor3(Draws[DrawID].transform)) #define PassColor Draws[DrawID].color #define ClipFromLocal (ViewProjection * Transform) #define ClipFromWorld (ViewProjection) @@ -167,6 +166,20 @@ layout(location = 14) in vec3 Tangent; #define WorldFromView (inverse(View)) #define WorldFromClip (inverse(ViewProjection)) #define DefaultPosition (ClipFromLocal * VertexPosition) + +mat3 cofactor3(mat4x3 m) { + return mat3(vec3( + (m[1][1] * m[2][2] - m[2][1] * m[1][2]), + -(m[1][0] * m[2][2] - m[2][0] * m[1][2]), + (m[1][0] * m[2][1] - m[2][0] * m[1][1])), vec3( + -(m[0][1] * m[2][2] - m[2][1] * m[0][2]), + (m[0][0] * m[2][2] - m[2][0] * m[0][2]), + -(m[0][0] * m[2][1] - m[2][0] * m[0][1])), vec3( + (m[0][1] * m[1][2] - m[1][1] * m[0][2]), + -(m[0][0] * m[1][2] - m[1][0] * m[0][2]), + (m[0][0] * m[1][1] - m[1][0] * m[0][1]) + )); +} #endif #ifdef GL_FRAGMENT_SHADER diff --git a/src/core/maf.h b/src/core/maf.h index da52a931..0949d4ee 100644 --- a/src/core/maf.h +++ b/src/core/maf.h @@ -616,30 +616,6 @@ MAF mat4 mat4_invert(mat4 m) { return m; } -MAF mat4 mat4_cofactor(mat4 m) { - float m00 = m[0], m04 = m[4], m08 = m[8], m12 = m[12]; - float m01 = m[1], m05 = m[5], m09 = m[9], m13 = m[13]; - float m02 = m[2], m06 = m[6], m10 = m[10], m14 = m[14]; - float m03 = m[3], m07 = m[7], m11 = m[11], m15 = m[15]; - m[0] = (m05 * (m10 * m15 - m11 * m14) - m09 * (m06 * m15 - m07 * m14) + m13 * (m06 * m11 - m07 * m10)); - m[1] = -(m04 * (m10 * m15 - m11 * m14) - m08 * (m06 * m15 - m07 * m14) + m12 * (m06 * m11 - m07 * m10)); - m[2] = (m04 * (m09 * m15 - m11 * m13) - m08 * (m05 * m15 - m07 * m13) + m12 * (m05 * m11 - m07 * m09)); - m[3] = -(m04 * (m09 * m14 - m10 * m13) - m08 * (m05 * m14 - m06 * m13) + m12 * (m05 * m10 - m06 * m09)); - m[4] = -(m01 * (m10 * m15 - m11 * m14) - m09 * (m02 * m15 - m03 * m14) + m13 * (m02 * m11 - m03 * m10)); - m[5] = (m00 * (m10 * m15 - m11 * m14) - m08 * (m02 * m15 - m03 * m14) + m12 * (m02 * m11 - m03 * m10)); - m[6] = -(m00 * (m09 * m15 - m11 * m13) - m08 * (m01 * m15 - m03 * m13) + m12 * (m01 * m11 - m03 * m09)); - m[7] = (m00 * (m09 * m14 - m10 * m13) - m08 * (m01 * m14 - m02 * m13) + m12 * (m01 * m10 - m02 * m09)); - m[8] = (m01 * (m06 * m15 - m07 * m14) - m05 * (m02 * m15 - m03 * m14) + m13 * (m02 * m07 - m03 * m06)); - m[9] = -(m00 * (m06 * m15 - m07 * m14) - m04 * (m02 * m15 - m03 * m14) + m12 * (m02 * m07 - m03 * m06)); - m[10] = (m00 * (m05 * m15 - m07 * m13) - m04 * (m01 * m15 - m03 * m13) + m12 * (m01 * m07 - m03 * m05)); - m[11] = -(m00 * (m05 * m14 - m06 * m13) - m04 * (m01 * m14 - m02 * m13) + m12 * (m01 * m06 - m02 * m05)); - m[12] = -(m01 * (m06 * m11 - m07 * m10) - m05 * (m02 * m11 - m03 * m10) + m09 * (m02 * m07 - m03 * m06)); - m[13] = (m00 * (m06 * m11 - m07 * m10) - m04 * (m02 * m11 - m03 * m10) + m08 * (m02 * m07 - m03 * m06)); - m[14] = -(m00 * (m05 * m11 - m07 * m09) - m04 * (m01 * m11 - m03 * m09) + m08 * (m01 * m07 - m03 * m05)); - m[15] = (m00 * (m05 * m10 - m06 * m09) - m04 * (m01 * m10 - m02 * m09) + m08 * (m01 * m06 - m02 * m05)); - return m; -} - // Calculate matrix equivalent to "apply n, then m" MAF mat4 mat4_mul(mat4 m, mat4 n) { float m00 = m[0], m01 = m[1], m02 = m[2], m03 = m[3], diff --git a/src/core/spv.c b/src/core/spv.c index 1e3279bb..f55a3a75 100644 --- a/src/core/spv.c +++ b/src/core/spv.c @@ -610,8 +610,8 @@ static spv_result spv_parse_field(spv_context* spv, const uint32_t* word, spv_fi } if (OP_CODE(word) == 22 && word[2] == 32) { // OpTypeFloat - if (columnCount >= 2 && columnCount <= 4 && componentCount == columnCount) { - field->type = SPV_MAT2 + columnCount - 2; + if (columnCount >= 2 && columnCount <= 4 && componentCount >= 2 && componentCount <= 4) { + field->type = SPV_MAT2x2 + (columnCount - 2) * 3 + (componentCount - 2); } else if (columnCount == 1 && componentCount >= 2 && componentCount <= 4) { field->type = SPV_F32x2 + componentCount - 2; } else if (columnCount == 1 && componentCount == 1) { diff --git a/src/core/spv.h b/src/core/spv.h index c5c87ba0..c08a29be 100644 --- a/src/core/spv.h +++ b/src/core/spv.h @@ -17,9 +17,15 @@ typedef enum { SPV_F32x2, SPV_F32x3, SPV_F32x4, - SPV_MAT2, - SPV_MAT3, - SPV_MAT4, + SPV_MAT2x2, + SPV_MAT2x3, + SPV_MAT2x4, + SPV_MAT3x2, + SPV_MAT3x3, + SPV_MAT3x4, + SPV_MAT4x2, + SPV_MAT4x3, + SPV_MAT4x4, SPV_STRUCT } spv_type; diff --git a/src/modules/graphics/graphics.c b/src/modules/graphics/graphics.c index 1b80b6f6..68543e32 100644 --- a/src/modules/graphics/graphics.c +++ b/src/modules/graphics/graphics.c @@ -194,8 +194,7 @@ struct Mesh { }; typedef struct { - float transform[16]; - float cofactor[12]; + float transform[12]; float color[4]; } DrawData; @@ -1277,27 +1276,29 @@ static void recordRenderPass(Pass* pass, gpu_stream* stream) { // DrawData - uint32_t drawPageCount = (activeDrawCount + 255) / 256; - uint32_t drawPageSize = (uint32_t) ALIGN(256 * sizeof(DrawData), align); - mapped = mapBuffer(&state.streamBuffers, drawPageCount * drawPageSize, align); - builtins[2].buffer = (gpu_buffer_binding) { mapped.buffer, mapped.offset, drawPageSize }; + mapped = mapBuffer(&state.streamBuffers, activeDrawCount * sizeof(DrawData), align); + builtins[2].buffer = (gpu_buffer_binding) { mapped.buffer, mapped.offset, 256 * sizeof(DrawData) }; DrawData* data = mapped.pointer; for (uint32_t i = 0; i < activeDrawCount; i++, data++) { Draw* draw = &pass->draws[activeDraws[i] >> 8][activeDraws[i] & 0xff]; - - if ((i & 0xff) == 0) { - data = (DrawData*) ALIGN(data, align); - } - - float cofactor[16]; - mat4_init(cofactor, draw->transform); - memcpy(cofactor + 12, (float[4]) { 0.f, 0.f, 0.f, 1.f }, 4 * sizeof(float)); - mat4_cofactor(cofactor); - - memcpy(data->transform, draw->transform, 16 * sizeof(float)); - memcpy(data->cofactor, cofactor, 12 * sizeof(float)); - memcpy(data->color, draw->color, 4 * sizeof(float)); + // transform is provided as 4x3 row-major matrix for packing reasons, need to transpose + data->transform[0] = draw->transform[0]; + data->transform[1] = draw->transform[4]; + data->transform[2] = draw->transform[8]; + data->transform[3] = draw->transform[12]; + data->transform[4] = draw->transform[1]; + data->transform[5] = draw->transform[5]; + data->transform[6] = draw->transform[9]; + data->transform[7] = draw->transform[13]; + data->transform[8] = draw->transform[2]; + data->transform[9] = draw->transform[6]; + data->transform[10] = draw->transform[10]; + data->transform[11] = draw->transform[14]; + data->color[0] = draw->color[0]; + data->color[1] = draw->color[1]; + data->color[2] = draw->color[2]; + data->color[3] = draw->color[3]; } gpu_bundle_write(&builtinBundle, &builtinfo, 1); @@ -1426,7 +1427,7 @@ static void recordRenderPass(Pass* pass, gpu_stream* stream) { } if ((i & 0xff) == 0 || draw->camera != cameraIndex || constantsDirty) { - uint32_t dynamicOffsets[] = { draw->camera * canvas->views * sizeof(Camera), (i >> 8) * drawPageSize }; + uint32_t dynamicOffsets[] = { draw->camera * canvas->views * sizeof(Camera), (i >> 8) * 256 * sizeof(DrawData) }; gpu_bind_bundles(stream, draw->shader->gpu, &builtinBundle, 0, 1, dynamicOffsets, COUNTOF(dynamicOffsets)); cameraIndex = draw->camera; } @@ -2792,9 +2793,16 @@ Shader* lovrShaderCreate(const ShaderInfo* info) { [SPV_F32x2] = TYPE_F32x2, [SPV_F32x3] = TYPE_F32x3, [SPV_F32x4] = TYPE_F32x4, - [SPV_MAT2] = TYPE_MAT2, - [SPV_MAT3] = TYPE_MAT3, - [SPV_MAT4] = TYPE_MAT4 + [SPV_MAT2x2] = TYPE_MAT2, + [SPV_MAT2x3] = ~0u, + [SPV_MAT2x4] = ~0u, + [SPV_MAT3x2] = ~0u, + [SPV_MAT3x3] = TYPE_MAT3, + [SPV_MAT3x4] = ~0u, + [SPV_MAT4x2] = ~0u, + [SPV_MAT4x3] = ~0u, + [SPV_MAT4x4] = TYPE_MAT4, + [SPV_STRUCT] = ~0u }; spv_field* field = &spv[s].fields[i]; @@ -2802,7 +2810,7 @@ Shader* lovrShaderCreate(const ShaderInfo* info) { uint32_t base = s == 1 ? spv[0].fieldCount : 0; shader->fields[base + i] = (DataField) { - .type = field->type == SPV_STRUCT ? ~0u : dataTypes[field->type], + .type = dataTypes[field->type], .offset = field->offset, .length = field->arrayLength, .stride = field->arrayStride,