Rework Buffer updates;

Use unsynchronized mapped buffers on macOS.

Reduce amount of Buffer flushes and stalls by flushing once per
batch instead of once per draw.
This commit is contained in:
bjorn 2019-05-13 20:35:21 -07:00
parent b8ed63fcc4
commit f786c40b4e
7 changed files with 83 additions and 70 deletions

View File

@ -1268,7 +1268,8 @@ static int l_lovrGraphicsNewMesh(lua_State* L) {
}
}
lovrBufferMarkRange(vertexBuffer, 0, count * stride);
lovrBufferFlush(vertexBuffer, 0, count * stride);
lovrBufferUnmap(vertexBuffer);
lovrRelease(Buffer, vertexBuffer);
luax_pushobject(L, mesh);

View File

@ -212,7 +212,7 @@ static int l_lovrMeshSetVertex(lua_State* L) {
}
}
}
lovrBufferMarkRange(mesh->vertexBuffer, index * stride, (index + 1) * stride);
lovrBufferFlush(mesh->vertexBuffer, index * stride, stride);
return 0;
}
@ -272,7 +272,17 @@ static int l_lovrMeshSetVertexAttribute(lua_State* L) {
lua_pop(L, 1);
}
}
lovrBufferMarkRange(mesh->vertexBuffer, vertexIndex * attribute->stride, (vertexIndex + 1) * attribute->stride);
size_t attributeSize = 0;
switch (attribute->type) {
case I8: attributeSize = attribute->components * sizeof(int8_t); break;
case U8: attributeSize = attribute->components * sizeof(uint8_t); break;
case I16: attributeSize = attribute->components * sizeof(int16_t); break;
case U16: attributeSize = attribute->components * sizeof(uint16_t); break;
case I32: attributeSize = attribute->components * sizeof(int32_t); break;
case U32: attributeSize = attribute->components * sizeof(uint32_t); break;
case F32: attributeSize = attribute->components * sizeof(float); break;
}
lovrBufferFlush(mesh->vertexBuffer, vertexIndex * attribute->stride + attribute->offset, attributeSize);
return 0;
}
@ -322,7 +332,7 @@ static int l_lovrMeshSetVertices(lua_State* L) {
lua_pop(L, 1);
}
lovrBufferMarkRange(mesh->vertexBuffer, start * stride, (start + count) * stride);
lovrBufferFlush(mesh->vertexBuffer, start * stride, count * stride);
return 0;
}
@ -385,7 +395,7 @@ static int l_lovrMeshSetVertexMap(lua_State* L) {
} else {
void* indices = lovrBufferMap(indexBuffer, 0);
memcpy(indices, blob->data, blob->size);
lovrBufferMarkRange(indexBuffer, 0, blob->size);
lovrBufferFlush(indexBuffer, 0, blob->size);
}
} else {
luaL_checktype(L, 2, LUA_TTABLE);
@ -424,7 +434,7 @@ static int l_lovrMeshSetVertexMap(lua_State* L) {
}
lovrMeshSetIndexBuffer(mesh, indexBuffer, count, size, 0);
lovrBufferMarkRange(indexBuffer, 0, count * size);
lovrBufferFlush(indexBuffer, 0, count * size);
}
return 0;

View File

@ -33,7 +33,7 @@ static int l_lovrShaderBlockSend(lua_State* L) {
Buffer* buffer = lovrShaderBlockGetBuffer(block);
uint8_t* data = lovrBufferMap(buffer, uniform->offset);
luax_checkuniform(L, 3, uniform, data, name);
lovrBufferMarkRange(buffer, uniform->offset, uniform->size);
lovrBufferFlush(buffer, uniform->offset, uniform->size);
return 0;
} else {
Blob* blob = luax_checktype(L, 1, Blob);
@ -42,7 +42,7 @@ static int l_lovrShaderBlockSend(lua_State* L) {
size_t bufferSize = lovrBufferGetSize(buffer);
size_t copySize = MIN(bufferSize, blob->size);
memcpy(data, blob->data, copySize);
lovrBufferMarkRange(buffer, 0, copySize);
lovrBufferFlush(buffer, 0, copySize);
lua_pushinteger(L, copySize);
return 1;
}

View File

@ -12,19 +12,3 @@ bool lovrBufferIsReadable(Buffer* buffer) {
BufferUsage lovrBufferGetUsage(Buffer* buffer) {
return buffer->usage;
}
void lovrBufferMarkRange(Buffer* buffer, size_t offset, size_t size) {
size_t end = offset + size;
buffer->flushFrom = MIN(buffer->flushFrom, offset);
buffer->flushTo = MAX(buffer->flushTo, end);
}
void lovrBufferFlush(Buffer* buffer) {
if (buffer->flushTo <= buffer->flushFrom) {
return;
}
lovrBufferFlushRange(buffer, buffer->flushFrom, buffer->flushTo - buffer->flushFrom);
buffer->flushFrom = SIZE_MAX;
buffer->flushTo = 0;
}

View File

@ -26,6 +26,7 @@ typedef struct Buffer {
size_t size;
size_t flushFrom;
size_t flushTo;
bool mapped;
bool readable;
BufferType type;
BufferUsage usage;
@ -39,6 +40,5 @@ size_t lovrBufferGetSize(Buffer* buffer);
bool lovrBufferIsReadable(Buffer* buffer);
BufferUsage lovrBufferGetUsage(Buffer* buffer);
void* lovrBufferMap(Buffer* buffer, size_t offset);
void lovrBufferFlushRange(Buffer* buffer, size_t offset, size_t size);
void lovrBufferMarkRange(Buffer* buffer, size_t offset, size_t size);
void lovrBufferFlush(Buffer* buffer);
void lovrBufferFlush(Buffer* buffer, size_t offset, size_t size);
void lovrBufferUnmap(Buffer* buffer);

View File

@ -68,7 +68,8 @@ static void lovrGraphicsInitBuffers() {
state.identityBuffer = lovrBufferCreate(MAX_DRAWS, NULL, BUFFER_VERTEX, USAGE_STATIC, false);
uint8_t* id = lovrBufferMap(state.identityBuffer, 0);
for (int i = 0; i < MAX_DRAWS; i++) id[i] = i;
lovrBufferFlushRange(state.identityBuffer, 0, MAX_DRAWS);
lovrBufferFlush(state.identityBuffer, 0, MAX_DRAWS);
lovrBufferUnmap(state.identityBuffer);
Buffer* vertexBuffer = state.buffers[STREAM_VERTEX];
size_t stride = BUFFER_STRIDES[STREAM_VERTEX];
@ -597,20 +598,23 @@ void lovrGraphicsFlush() {
int batchCount = state.batchCount;
state.batchCount = 0;
// Flush buffers
Batch* firstBatch = &state.batches[0];
Batch* lastBatch = &state.batches[batchCount - 1];
for (int i = 0; i < MAX_BUFFER_ROLES; i++) {
size_t offset = firstBatch->cursors[i].start * BUFFER_STRIDES[i];
size_t size = (lastBatch->cursors[i].start + lastBatch->cursors[i].count - firstBatch->cursors[i].start) * BUFFER_STRIDES[i];
lovrBufferFlush(state.buffers[i], offset, size);
lovrBufferUnmap(state.buffers[i]);
}
for (int b = 0; b < batchCount; b++) {
Batch* batch = &state.batches[b];
BatchParams* params = &batch->params;
Mesh* mesh = batch->type == BATCH_MESH ? params->mesh.object : (batch->instanced ? state.instancedMesh : state.mesh);
int instances = batch->instanced ? batch->count : 1;
// Flush buffers
for (int i = 0; i < MAX_BUFFER_ROLES; i++) {
if (batch->cursors[i].count > 0) {
size_t stride = BUFFER_STRIDES[i];
lovrBufferFlushRange(state.buffers[i], batch->cursors[i].start * stride, batch->cursors[i].count * stride);
}
}
// Bind UBOs
lovrShaderSetBlock(batch->shader, "lovrModelBlock", state.buffers[STREAM_TRANSFORM], batch->cursors[STREAM_TRANSFORM].start * BUFFER_STRIDES[STREAM_TRANSFORM], MAX_DRAWS * BUFFER_STRIDES[STREAM_TRANSFORM], ACCESS_READ);
lovrShaderSetBlock(batch->shader, "lovrColorBlock", state.buffers[STREAM_COLOR], batch->cursors[STREAM_COLOR].start * BUFFER_STRIDES[STREAM_COLOR], MAX_DRAWS * BUFFER_STRIDES[STREAM_COLOR], ACCESS_READ);

View File

@ -492,7 +492,7 @@ static void lovrGpuBindMesh(Mesh* mesh, Shader* shader, int baseDivisor) {
if (mesh->indexBuffer && mesh->indexCount > 0) {
lovrGpuBindBuffer(BUFFER_INDEX, mesh->indexBuffer->id);
lovrBufferFlush(mesh->indexBuffer);
lovrBufferUnmap(mesh->indexBuffer);
#ifdef LOVR_GL
uint32_t primitiveRestart = mesh->indexSize == 4 ? 0xffffffff : 0xffff;
if (state.primitiveRestart != primitiveRestart) {
@ -510,7 +510,7 @@ static void lovrGpuBindMesh(Mesh* mesh, Shader* shader, int baseDivisor) {
if ((attribute = &mesh->attributes[i])->disabled) { continue; }
if ((location = lovrShaderGetAttributeLocation(shader, mesh->attributeNames[i])) < 0) { continue; }
lovrBufferFlush(attribute->buffer);
lovrBufferUnmap(attribute->buffer);
enabledLocations |= (1 << location);
uint16_t divisor = attribute->divisor * baseDivisor;
@ -885,7 +885,7 @@ static void lovrGpuBindShader(Shader* shader) {
vec_push(&state.incoherents[BARRIER_BLOCK], block->source);
}
lovrBufferFlush(block->source);
lovrBufferUnmap(block->source);
lovrGpuBindBlockBuffer(type, block->source->id, block->slot, block->offset, block->size);
} else {
lovrGpuBindBlockBuffer(type, 0, block->slot, 0, 0);
@ -1150,21 +1150,17 @@ void lovrGpuDirtyTexture() {
state.textures[state.activeTexture] = NULL;
}
// We only need to sync when using persistently mapped buffers
// There also seems to be a driver quirk where fencing before submitting GPU work messes up the
// whole frame (observed on WASM and Android)
// We only need to sync when using mapped buffers
void* lovrGpuLock() {
#ifndef LOVR_WEBGL
if (GLAD_GL_ARB_buffer_storage) {
return (void*) glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
return (void*) glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
#endif
return NULL;
}
void lovrGpuUnlock(void* lock) {
#ifndef LOVR_WEBGL
if (!lock || !GLAD_GL_ARB_buffer_storage) return;
if (!lock) return;
GLsync sync = (GLsync) lock;
if (glClientWaitSync(sync, 0, 0) == GL_TIMEOUT_EXPIRED) {
while (glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, 32768) == GL_TIMEOUT_EXPIRED) {
@ -1176,7 +1172,7 @@ void lovrGpuUnlock(void* lock) {
void lovrGpuDestroyLock(void* lock) {
#ifndef LOVR_WEBGL
if (lock && GLAD_GL_ARB_buffer_storage) glDeleteSync((GLsync) lock);
if (lock) glDeleteSync((GLsync) lock);
#endif
}
@ -1560,21 +1556,21 @@ Buffer* lovrBufferInit(Buffer* buffer, size_t size, void* data, BufferType type,
lovrGpuBindBuffer(type, buffer->id);
GLenum glType = convertBufferType(type);
#ifndef LOVR_WEBGL
#ifdef LOVR_WEBGL
if (GLAD_GL_ARB_buffer_storage) {
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (readable ? GL_MAP_READ_BIT : 0);
glBufferStorage(glType, size, data, flags);
buffer->data = glMapBufferRange(glType, 0, size, flags | GL_MAP_FLUSH_EXPLICIT_BIT);
} else {
#endif
buffer->data = malloc(size);
lovrAssert(buffer->data, "Out of memory");
glBufferData(glType, size, data, convertBufferUsage(usage));
}
#else
buffer->data = malloc(size);
lovrAssert(buffer->data, "Out of memory");
glBufferData(glType, size, data, convertBufferUsage(usage));
if (data) {
memcpy(buffer->data, data, size);
}
#ifndef LOVR_WEBGL
if (data) {
memcpy(buffer->data, data, size);
}
#endif
@ -1585,30 +1581,48 @@ void lovrBufferDestroy(void* ref) {
Buffer* buffer = ref;
lovrGpuDestroySyncResource(buffer, buffer->incoherent);
glDeleteBuffers(1, &buffer->id);
#ifndef LOVR_WEBGL
if (!GLAD_GL_ARB_buffer_storage) {
#endif
free(buffer->data);
#ifndef LOVR_WEBGL
}
#ifdef LOVR_WEBGL
free(buffer->data);
#endif
}
void* lovrBufferMap(Buffer* buffer, size_t offset) {
#ifndef LOVR_WEBGL
if (!GLAD_GL_ARB_buffer_storage && !buffer->mapped) {
buffer->mapped = true;
lovrGpuBindBuffer(buffer->type, buffer->id);
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | GL_MAP_UNSYNCHRONIZED_BIT | (buffer->readable ? GL_MAP_READ_BIT : 0);
buffer->data = glMapBufferRange(convertBufferType(buffer->type), 0, buffer->size, flags);
}
#endif
return (uint8_t*) buffer->data + offset;
}
void lovrBufferFlushRange(Buffer* buffer, size_t offset, size_t size) {
lovrGpuBindBuffer(buffer->type, buffer->id);
#ifndef LOVR_WEBGL
if (GLAD_GL_ARB_buffer_storage) {
glFlushMappedBufferRange(convertBufferType(buffer->type), offset, size);
} else {
#endif
glBufferSubData(convertBufferType(buffer->type), offset, size, (GLvoid*) ((uint8_t*) buffer->data + offset));
#ifndef LOVR_WEBGL
void lovrBufferFlush(Buffer* buffer, size_t offset, size_t size) {
buffer->flushFrom = MIN(buffer->flushFrom, offset);
buffer->flushTo = MAX(buffer->flushTo, offset + size);
}
void lovrBufferUnmap(Buffer* buffer) {
#ifdef LOVR_WEBGL
if (buffer->flushTo > buffer->flushFrom) {
lovrGpuBindBuffer(buffer->type, buffer->id);
void* data = (uint8_t*) buffer->data + buffer->flushFrom;
glBufferSubData(convertBufferType(buffer->type), buffer->flushFrom, buffer->flushTo - buffer->flushFrom, data);
}
#else
if (buffer->flushTo > buffer->flushFrom) {
lovrGpuBindBuffer(buffer->type, buffer->id);
glFlushMappedBufferRange(convertBufferType(buffer->type), buffer->flushFrom, buffer->flushTo - buffer->flushFrom);
}
if (!GLAD_GL_ARB_buffer_storage) {
glUnmapBuffer(convertBufferType(buffer->type));
buffer->mapped = false;
}
#endif
buffer->flushFrom = SIZE_MAX;
buffer->flushTo = 0;
}
// Shader