From 4279cf086f3fd7a4625f26fe54d4209a35179fa7 Mon Sep 17 00:00:00 2001 From: bjorn Date: Sat, 6 Oct 2018 21:21:25 -0700 Subject: [PATCH] mat4_invertPose; ~3.9x --- CMakeLists.txt | 4 ++++ src/headset/fake.c | 2 +- src/headset/openvr.c | 2 +- src/math/mat4.c | 45 +++++++++++++++++++++++++++++++++++--------- src/math/mat4.h | 6 ++++++ 5 files changed, 48 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d192e50f..f562fc8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -454,6 +454,10 @@ if(LOVR_ENABLE_MATH) src/api/types/transform.c src/lib/noise1234/noise1234.c ) + + if(LOVR_USE_SSE) + add_definitions(-DLOVR_USE_SSE) + endif() endif() if(LOVR_ENABLE_PHYSICS) diff --git a/src/headset/fake.c b/src/headset/fake.c index 36167d88..7c1b334a 100644 --- a/src/headset/fake.c +++ b/src/headset/fake.c @@ -218,7 +218,7 @@ static void fakeRenderTo(void (*callback)(void*), void* userdata) { mat4_identity(camera.viewMatrix[0]); mat4_translate(camera.viewMatrix[0], 0, state.offset, 0); mat4_multiply(camera.viewMatrix[0], state.transform); - mat4_invert(camera.viewMatrix[0]); + mat4_invertPose(camera.viewMatrix[0]); mat4_set(camera.projection[1], camera.projection[0]); mat4_set(camera.viewMatrix[1], camera.viewMatrix[0]); lovrGraphicsSetCamera(&camera, true); diff --git a/src/headset/openvr.c b/src/headset/openvr.c index 538c5c8b..7e5a5cb0 100644 --- a/src/headset/openvr.c +++ b/src/headset/openvr.c @@ -553,7 +553,7 @@ static void openvrRenderTo(void (*callback)(void*), void* userdata) { mat4_translate(camera.viewMatrix[i], 0, state.offset, 0); mat4_multiply(camera.viewMatrix[i], head); mat4_multiply(camera.viewMatrix[i], mat4_fromMat34(eye, state.system->GetEyeToHeadTransform(vrEye).m)); - mat4_invert(camera.viewMatrix[i]); + mat4_invertPose(camera.viewMatrix[i]); } lovrGraphicsSetCamera(&camera, true); diff --git a/src/math/mat4.c b/src/math/mat4.c index e1a0e496..44c8719c 100644 --- a/src/math/mat4.c +++ b/src/math/mat4.c @@ -108,17 +108,44 @@ mat4 mat4_invert(mat4 m) { return m; } +// This can only be used if the matrix doesn't have any scale applied +#ifdef LOVR_USE_SSE +mat4 mat4_invertPose(mat4 m) { + __m128 c0 = _mm_loadu_ps(m + 0); + __m128 c1 = _mm_loadu_ps(m + 4); + __m128 c2 = _mm_loadu_ps(m + 8); + __m128 c3 = _mm_loadu_ps(m + 12); + __m128 x1 = _mm_set_ps(1.f, 0.f, 0.f, 0.f); + + _MM_TRANSPOSE4_PS(c0, c1, c2, x1); + + __m128 x0 = _mm_add_ps( + _mm_mul_ps(c0, _mm_shuffle_ps(c3, c3, _MM_SHUFFLE(0, 0, 0, 0))), + _mm_mul_ps(c1, _mm_shuffle_ps(c3, c3, _MM_SHUFFLE(1, 1, 1, 1))) + ); + + x0 = _mm_add_ps(x0, _mm_mul_ps(c2, _mm_shuffle_ps(c3, c3, _MM_SHUFFLE(2, 2, 2, 2)))); + x0 = _mm_xor_ps(x0, _mm_set1_ps(-0.f)); + x0 = _mm_add_ps(x0, x1); + + _mm_storeu_ps(m + 0, c0); + _mm_storeu_ps(m + 4, c1); + _mm_storeu_ps(m + 8, c2); + _mm_storeu_ps(m + 12, x0); +} +#endif + mat4 mat4_transpose(mat4 m) { #ifdef LOVR_USE_SSE - __m128 c1 = _mm_loadu_ps(m + 0); - __m128 c2 = _mm_loadu_ps(m + 4); - __m128 c3 = _mm_loadu_ps(m + 8); - __m128 c4 = _mm_loadu_ps(m + 12); - _MM_TRANSPOSE4_PS(c1, c2, c3, c4); - _mm_storeu_ps(m + 0, c1); - _mm_storeu_ps(m + 4, c2); - _mm_storeu_ps(m + 8, c3); - _mm_storeu_ps(m + 12, c4); + __m128 c0 = _mm_loadu_ps(m + 0); + __m128 c1 = _mm_loadu_ps(m + 4); + __m128 c2 = _mm_loadu_ps(m + 8); + __m128 c3 = _mm_loadu_ps(m + 12); + _MM_TRANSPOSE4_PS(c0, c1, c2, c3); + _mm_storeu_ps(m + 0, c0); + _mm_storeu_ps(m + 4, c1); + _mm_storeu_ps(m + 8, c2); + _mm_storeu_ps(m + 12, c3); return m; #else float a01 = m[1], a02 = m[2], a03 = m[3], diff --git a/src/math/mat4.h b/src/math/mat4.h index fc6da402..ac0fcd14 100644 --- a/src/math/mat4.h +++ b/src/math/mat4.h @@ -22,3 +22,9 @@ mat4 mat4_perspective(mat4 m, float near, float far, float fov, float aspect); mat4 mat4_lookAt(mat4 m, vec3 from, vec3 to, vec3 up); void mat4_transform(mat4 m, float* x, float* y, float* z); void mat4_transformDirection(mat4 m, float* x, float* y, float* z); + +#ifdef LOVR_USE_SSE +mat4 mat4_invertPose(mat4 m); +#else +#define mat4_invertPose mat4_invert +#endif