rm LOVR_USE_SSE;

It doesn't really improve performance on an average LÖVR app and
isn't worth the complexity.
This commit is contained in:
bjorn 2019-10-22 16:45:34 -07:00
parent d51d2b4105
commit b35ebc15b6
5 changed files with 2 additions and 82 deletions

View File

@ -25,7 +25,6 @@ option(LOVR_USE_OCULUS "Enable the LibOVR backend for the headset module (be sur
option(LOVR_USE_OCULUS_MOBILE "Enable the Oculus Mobile (Android) backend for the headset module" OFF)
option(LOVR_USE_DESKTOP_HEADSET "Enable the keyboard/mouse backend for the headset module" ON)
option(LOVR_USE_LEAP "Enable the Leap Motion backend for the headset module" OFF)
option(LOVR_USE_SSE "Enable use of SIMD intrinsics" ON)
option(LOVR_SYSTEM_PHYSFS "Use the system-provided PhysFS" OFF)
option(LOVR_SYSTEM_ENET "Use the system-provided enet" OFF)
@ -77,10 +76,6 @@ elseif(UNIX)
find_package(PkgConfig)
endif()
if(EMSCRIPTEN OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
set(LOVR_USE_SSE OFF)
endif()
# PhysicsFS
if(LOVR_ENABLE_FILESYSTEM)
if(LOVR_SYSTEM_PHYSFS)
@ -496,10 +491,6 @@ if(LOVR_ENABLE_MATH)
src/api/l_vectors.c
src/lib/noise1234/noise1234.c
)
if(LOVR_USE_SSE)
add_definitions(-DLOVR_USE_SSE)
endif()
endif()
if(LOVR_ENABLE_PHYSICS)

View File

@ -46,11 +46,6 @@ CFLAGS_@(VRAPI) += -DLOVR_USE_OCULUS_MOBILE
CFLAGS_@(WEBVR) += LOVR_USE_WEBVR
CFLAGS_@(LEAP) += LOVR_USE_LEAP
# SIMD
ifeq (@(TUP_ARCH),x86_64)
CFLAGS += -DLOVR_USE_SSE
endif
# Lua
CFLAGS_@(LUAJIT) += -DLOVR_USE_LUAJIT

View File

@ -1,9 +1,6 @@
#include <string.h>
#include <math.h>
#include "util.h"
#ifdef LOVR_USE_SSE
#include <xmmintrin.h>
#endif
#pragma once
@ -356,18 +353,6 @@ MAF mat4 mat4_identity(mat4 m) {
}
MAF mat4 mat4_transpose(mat4 m) {
#ifdef LOVR_USE_SSE
__m128 c0 = _mm_loadu_ps(m + 0);
__m128 c1 = _mm_loadu_ps(m + 4);
__m128 c2 = _mm_loadu_ps(m + 8);
__m128 c3 = _mm_loadu_ps(m + 12);
_MM_TRANSPOSE4_PS(c0, c1, c2, c3);
_mm_storeu_ps(m + 0, c0);
_mm_storeu_ps(m + 4, c1);
_mm_storeu_ps(m + 8, c2);
_mm_storeu_ps(m + 12, c3);
return m;
#else
float a01 = m[1], a02 = m[2], a03 = m[3],
a12 = m[6], a13 = m[7],
a23 = m[11];
@ -385,7 +370,6 @@ MAF mat4 mat4_transpose(mat4 m) {
m[13] = a13;
m[14] = a23;
return m;
#endif
}
MAF mat4 mat4_invert(mat4 m) {
@ -433,56 +417,7 @@ MAF mat4 mat4_invert(mat4 m) {
return m;
}
// This can only be used if the matrix doesn't have any scale applied
MAF mat4 mat4_invertPose(mat4 m) {
#ifdef LOVR_USE_SSE
__m128 c0 = _mm_loadu_ps(m + 0);
__m128 c1 = _mm_loadu_ps(m + 4);
__m128 c2 = _mm_loadu_ps(m + 8);
__m128 c3 = _mm_loadu_ps(m + 12);
__m128 x1 = _mm_set_ps(1.f, 0.f, 0.f, 0.f);
_MM_TRANSPOSE4_PS(c0, c1, c2, x1);
__m128 x0 = _mm_add_ps(
_mm_mul_ps(c0, _mm_shuffle_ps(c3, c3, _MM_SHUFFLE(0, 0, 0, 0))),
_mm_mul_ps(c1, _mm_shuffle_ps(c3, c3, _MM_SHUFFLE(1, 1, 1, 1)))
);
x0 = _mm_add_ps(x0, _mm_mul_ps(c2, _mm_shuffle_ps(c3, c3, _MM_SHUFFLE(2, 2, 2, 2))));
x0 = _mm_xor_ps(x0, _mm_set1_ps(-0.f));
x0 = _mm_add_ps(x0, x1);
_mm_storeu_ps(m + 0, c0);
_mm_storeu_ps(m + 4, c1);
_mm_storeu_ps(m + 8, c2);
_mm_storeu_ps(m + 12, x0);
return m;
#else
return mat4_invert(m);
#endif
}
MAF mat4 mat4_multiply(mat4 m, mat4 n) {
#ifdef LOVR_USE_SSE
__m128 c0 = _mm_loadu_ps(m + 0);
__m128 c1 = _mm_loadu_ps(m + 4);
__m128 c2 = _mm_loadu_ps(m + 8);
__m128 c3 = _mm_loadu_ps(m + 12);
for (int i = 0; i < 4; i++) {
__m128 x = _mm_set1_ps(n[4 * i + 0]);
__m128 y = _mm_set1_ps(n[4 * i + 1]);
__m128 z = _mm_set1_ps(n[4 * i + 2]);
__m128 w = _mm_set1_ps(n[4 * i + 3]);
_mm_storeu_ps(m + 4 * i, _mm_add_ps(
_mm_add_ps(_mm_mul_ps(x, c0), _mm_mul_ps(y, c1)),
_mm_add_ps(_mm_mul_ps(z, c2), _mm_mul_ps(w, c3))
));
}
#else
float m00 = m[0], m01 = m[1], m02 = m[2], m03 = m[3],
m10 = m[4], m11 = m[5], m12 = m[6], m13 = m[7],
m20 = m[8], m21 = m[9], m22 = m[10], m23 = m[11],
@ -509,7 +444,6 @@ MAF mat4 mat4_multiply(mat4 m, mat4 n) {
m[13] = n30 * m01 + n31 * m11 + n32 * m21 + n33 * m31;
m[14] = n30 * m02 + n31 * m12 + n32 * m22 + n33 * m32;
m[15] = n30 * m03 + n31 * m13 + n32 * m23 + n33 * m33;
#endif
return m;
}

View File

@ -143,7 +143,7 @@ static void desktop_renderTo(void (*callback)(void*), void* userdata) {
Camera camera = { .canvas = NULL, .viewMatrix = { MAT4_IDENTITY }, .stereo = true };
mat4_perspective(camera.projection[0], state.clipNear, state.clipFar, 67.f * (float) M_PI / 180.f, (float) width / 2.f / height);
mat4_multiply(camera.viewMatrix[0], state.headTransform);
mat4_invertPose(camera.viewMatrix[0]);
mat4_invert(camera.viewMatrix[0]);
mat4_set(camera.projection[1], camera.projection[0]);
mat4_set(camera.viewMatrix[1], camera.viewMatrix[0]);
lovrGraphicsSetCamera(&camera, true);

View File

@ -487,7 +487,7 @@ static void openvr_renderTo(void (*callback)(void*), void* userdata) {
mat4_fromMat44(camera.projection[i], state.system->GetProjectionMatrix(vrEye, state.clipNear, state.clipFar).m);
mat4_multiply(camera.viewMatrix[i], head);
mat4_multiply(camera.viewMatrix[i], mat4_fromMat34(eye, state.system->GetEyeToHeadTransform(vrEye).m));
mat4_invertPose(camera.viewMatrix[i]);
mat4_invert(camera.viewMatrix[i]);
}
lovrGraphicsSetCamera(&camera, true);