Tally/Readback fixes;

This commit is contained in:
bjorn 2022-07-14 19:23:02 -07:00
parent 45135899a1
commit e2bfff1b0a
5 changed files with 131 additions and 51 deletions

View File

@ -896,10 +896,18 @@ static int l_lovrPassRead(lua_State* L) {
}
static int l_lovrPassTick(lua_State* L) {
Pass* pass = luax_checktype(L, 1, Pass);
Tally* tally = luax_checktype(L, 2, Tally);
uint32_t index = luax_checku32(L, 3) - 1;
lovrPassTick(pass, tally, index);
return 0;
}
static int l_lovrPassTock(lua_State* L) {
Pass* pass = luax_checktype(L, 1, Pass);
Tally* tally = luax_checktype(L, 2, Tally);
uint32_t index = luax_checku32(L, 3) - 1;
lovrPassTock(pass, tally, index);
return 0;
}

View File

@ -20,10 +20,32 @@ static int l_lovrReadbackWait(lua_State* L) {
}
static int l_lovrReadbackGetData(lua_State* L) {
//Readback* readback = luax_checktype(L, 1, Readback);
//void* data = lovrReadbackGetData(readback);
lua_pushnil(L);
return 1;
Readback* readback = luax_checktype(L, 1, Readback);
const ReadbackInfo* info = lovrReadbackGetInfo(readback);
void* data = lovrReadbackGetData(readback);
uint32_t* u32 = data;
switch (info->type) {
case READBACK_BUFFER:
// TODO
return 0;
case READBACK_TEXTURE:
lua_pushnil(L);
return 1;
case READBACK_TALLY: {
int count = (int) info->tally.count;
if (lovrTallyGetInfo(info->tally.object)->type == TALLY_STAGE) {
count *= 6; // The number of pipeline statistics that are tracked
}
lua_createtable(L, count, 0);
for (int i = 0; i < count; i++) {
lua_pushinteger(L, u32[i]);
lua_rawseti(L, -2, i + 1);
}
return 1;
}
}
}
static int l_lovrReadbackGetBlob(lua_State* L) {
@ -44,6 +66,7 @@ const luaL_Reg lovrReadback[] = {
{ "isComplete", l_lovrReadbackIsComplete },
{ "wait", l_lovrReadbackWait },
{ "getData", l_lovrReadbackGetData },
{ "getBlob", l_lovrReadbackGetBlob },
{ "getImage", l_lovrReadbackGetImage },
{ NULL, NULL }
};

View File

@ -426,7 +426,7 @@ void* gpu_map(gpu_buffer* buffer, uint32_t size, uint32_t align, gpu_map_mode mo
VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_SRC_BIT) :
VK_BUFFER_USAGE_TRANSFER_DST_BIT
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT
};
VkBuffer handle;
@ -1376,9 +1376,7 @@ bool gpu_tally_init(gpu_tally* tally, gpu_tally_info* info) {
.queryCount = info->count,
.pipelineStatistics = info->type == GPU_TALLY_PIPELINE ? (
VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT |
VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT |
VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT |
VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT |
VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT |
VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT
) : 0
@ -1605,7 +1603,7 @@ void gpu_copy_texture_buffer(gpu_stream* stream, gpu_texture* src, gpu_buffer* d
}
void gpu_copy_tally_buffer(gpu_stream* stream, gpu_tally* src, gpu_buffer* dst, uint32_t srcIndex, uint32_t dstOffset, uint32_t count, uint32_t stride) {
vkCmdCopyQueryPoolResults(stream->commands, src->handle, srcIndex, count, dst->handle, dstOffset, stride, VK_QUERY_RESULT_WAIT_BIT);
vkCmdCopyQueryPoolResults(stream->commands, src->handle, srcIndex, count, dst->handle, dst->offset + dstOffset, stride, VK_QUERY_RESULT_WAIT_BIT);
}
void gpu_clear_buffer(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset, uint32_t size) {
@ -1867,6 +1865,7 @@ bool gpu_init(gpu_config* config) {
enable->shaderClipDistance = supports->shaderClipDistance;
enable->shaderCullDistance = supports->shaderCullDistance;
enable->largePoints = supports->largePoints;
enable->pipelineStatisticsQuery = supports->pipelineStatisticsQuery;
// Optional features (currently always enabled when supported)
config->features->textureBC = enable->textureCompressionBC = supports->textureCompressionBC;

View File

@ -167,7 +167,7 @@ typedef enum {
VERTEX_GLYPH,
VERTEX_MODEL,
VERTEX_EMPTY,
VERTEX_FORMAT_COUNT
VERTEX_FORMAX
} VertexFormat;
typedef struct {
@ -231,7 +231,6 @@ struct Tally {
TallyInfo info;
gpu_tally* gpu;
gpu_buffer* buffer;
uint64_t* masks;
};
typedef struct {
@ -368,7 +367,7 @@ static struct {
Shader* animator;
Shader* timeWizard;
Shader* defaultShaders[DEFAULT_SHADER_COUNT];
gpu_vertex_format vertexFormats[VERTEX_FORMAT_COUNT];
gpu_vertex_format vertexFormats[VERTEX_FORMAX];
Readback* oldestReadback;
Readback* newestReadback;
Material* defaultMaterial;
@ -596,6 +595,9 @@ void lovrGraphicsDestroy() {
if (!state.initialized) return;
cleanupPasses();
arr_free(&state.passes);
for (Readback* readback = state.oldestReadback; readback; readback = readback->next) {
lovrRelease(readback, lovrReadbackDestroy);
}
lovrRelease(state.window, lovrTextureDestroy);
for (uint32_t i = 0; i < state.attachments.length; i++) {
gpu_texture_destroy(state.attachments.data[i].texture);
@ -2726,30 +2728,59 @@ Readback* lovrReadbackCreate(const ReadbackInfo* info) {
readback->info = *info;
readback->buffer = (gpu_buffer*) (readback + 1);
if (readback->info.width > 0 && readback->info.height > 0) {
readback->size = measureTexture(info->format, info->width, info->height, 1);
readback->image = lovrImageCreateRaw(info->width, info->height, info->format);
} else {
readback->size = info->size;
readback->data = malloc(info->size);
lovrAssert(readback->data, "Out of memory");
switch (info->type) {
case READBACK_BUFFER:
lovrRetain(info->buffer.object);
readback->size = info->buffer.extent;
readback->data = malloc(readback->size);
lovrAssert(readback->data, "Out of memory");
break;
case READBACK_TEXTURE:
lovrRetain(info->texture.object);
TextureFormat format = info->texture.object->info.format;
readback->size = measureTexture(format, info->texture.extent[0], info->texture.extent[1], 1);
readback->image = lovrImageCreateRaw(info->texture.extent[0], info->texture.extent[1], format);
break;
case READBACK_TALLY:
lovrRetain(info->tally.object);
uint32_t stride = info->tally.object->info.type == TALLY_STAGE ? 24 : 4;
readback->size = info->tally.count * stride;
readback->data = malloc(readback->size);
lovrAssert(readback->data, "Out of memory");
break;
}
readback->pointer = gpu_map(readback->buffer, readback->size, 16, GPU_MAP_READ);
if (!state.oldestReadback) state.oldestReadback = readback;
*(state.newestReadback ? &state.newestReadback->next : &state.newestReadback) = readback;
if (!state.oldestReadback) {
state.oldestReadback = readback;
}
if (state.newestReadback) {
state.newestReadback->next = readback;
}
state.newestReadback = readback;
lovrRetain(readback);
return readback;
}
void lovrReadbackDestroy(void* ref) {
Readback* readback = ref;
switch (readback->info.type) {
case READBACK_BUFFER: lovrRelease(readback->info.buffer.object, lovrBufferDestroy); break;
case READBACK_TEXTURE: lovrRelease(readback->info.texture.object, lovrTextureDestroy); break;
case READBACK_TALLY: lovrRelease(readback->info.tally.object, lovrTallyDestroy); break;
}
lovrRelease(readback->image, lovrImageDestroy);
free(readback->data);
free(readback);
}
const ReadbackInfo* lovrReadbackGetInfo(Readback* readback) {
return &readback->info;
}
bool lovrReadbackIsComplete(Readback* readback) {
return gpu_is_complete(readback->tick);
}
@ -2785,11 +2816,9 @@ Tally* lovrTallyCreate(const TallyInfo* info) {
Tally* tally = calloc(1, sizeof(Tally) + gpu_sizeof_tally());
lovrAssert(tally, "Out of memory");
tally->ref = 1;
tally->tick = state.tick;
tally->tick = state.tick - 1;
tally->info = *info;
tally->gpu = (gpu_tally*) (tally + 1);
tally->masks = calloc((info->count + 63) / 64, sizeof(uint64_t));
lovrAssert(tally->masks, "Out of memory");
uint32_t total = info->count * (info->type == TALLY_TIMER ? 2 * info->views : 1);
@ -2814,7 +2843,6 @@ void lovrTallyDestroy(void* ref) {
gpu_tally_destroy(tally->gpu);
if (tally->buffer) gpu_buffer_destroy(tally->buffer);
free(tally->buffer);
free(tally->masks);
free(tally);
}
@ -2827,7 +2855,7 @@ const TallyInfo* lovrTallyGetInfo(Tally* tally) {
// them to a temporary buffer, then dispatch a compute shader to subtract pairs and convert to ns,
// writing the final friendly values to a destination Buffer.
static void lovrTallyResolve(Tally* tally, uint32_t index, uint32_t count, gpu_buffer* buffer, uint32_t offset, gpu_stream* stream) {
gpu_copy_tally_buffer(stream, tally->gpu, tally->buffer, index, 0, count, 4);
gpu_copy_tally_buffer(stream, tally->gpu, tally->buffer, index, 0, count * 2, 4);
gpu_sync(stream, &(gpu_barrier) {
.prev = GPU_PHASE_TRANSFER,
@ -2849,7 +2877,7 @@ static void lovrTallyResolve(Tally* tally, uint32_t index, uint32_t count, gpu_b
gpu_shader* shader = state.timeWizard->gpu;
gpu_binding bindings[] = {
[0] = { 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { tally->buffer, 0, ~0u } },
[0] = { 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { tally->buffer, 0, count * 2 * tally->info.views * sizeof(uint32_t) } },
[1] = { 1, GPU_SLOT_STORAGE_BUFFER, .buffer = { buffer, offset, count * sizeof(uint32_t) } }
};
@ -3012,6 +3040,7 @@ Pass* lovrGraphicsGetPass(PassInfo* info) {
pass->pipeline = &pass->pipelines[0];
pass->pipeline->info = (gpu_pipeline_info) {
.colorCount = colorTextureCount,
.rasterizer.winding = GPU_WINDING_CW,
.depth.format = canvas->depth.texture ? canvas->depth.texture->info.format : canvas->depth.format,
.multisample.count = canvas->samples,
.viewCount = main->depth,
@ -3884,7 +3913,7 @@ void lovrPassPlane(Pass* pass, float* transform, DrawStyle style, uint32_t cols,
.vertex.pointer = (void**) &vertices,
.vertex.count = vertexCount,
.index.pointer = (void**) &indices,
.index.count = indexCount,
.index.count = indexCount
});
} else {
indexCount = (cols * rows) * 6;
@ -4816,11 +4845,6 @@ void lovrPassCopyTallyToBuffer(Pass* pass, Tally* tally, Buffer* buffer, uint32_
lovrCheck(dstOffset + count * 4 <= buffer->size, "Buffer copy range goes past the end of the destination Buffer");
lovrCheck(dstOffset % 4 == 0, "Buffer copy offset must be a multiple of 4");
for (uint32_t i = 0; i < count; i++) {
uint32_t index = srcIndex + i;
lovrCheck(tally->masks[index / 64] & (1 << (index % 64)), "Trying to copy Tally slot %d, but it hasn't been marked yet", index + 1);
}
if (tally->info.type == TALLY_TIMER) {
lovrTallyResolve(tally, srcIndex, count, buffer->gpu, dstOffset, pass->stream);
trackBuffer(pass, buffer, GPU_PHASE_SHADER_COMPUTE, GPU_CACHE_STORAGE_WRITE);
@ -4921,7 +4945,12 @@ Readback* lovrPassReadBuffer(Pass* pass, Buffer* buffer, uint32_t offset, uint32
lovrCheck(pass->info.type == PASS_TRANSFER, "This function can only be called on a transfer pass");
lovrCheck(!lovrBufferIsTemporary(buffer), "Unable to read back a temporary buffer");
lovrCheck(offset + extent <= buffer->size, "Tried to read past the end of the Buffer");
Readback* readback = lovrReadbackCreate(&(ReadbackInfo) { .size = extent });
Readback* readback = lovrReadbackCreate(&(ReadbackInfo) {
.type = READBACK_BUFFER,
.buffer.object = buffer,
.buffer.offset = offset,
.buffer.extent = extent
});
gpu_copy_buffers(pass->stream, buffer->gpu, readback->buffer, offset, 0, extent);
trackBuffer(pass, buffer, GPU_PHASE_TRANSFER, GPU_CACHE_TRANSFER_READ);
return readback;
@ -4937,9 +4966,10 @@ Readback* lovrPassReadTexture(Pass* pass, Texture* texture, uint32_t offset[4],
lovrCheck(texture->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to read from it");
checkTextureBounds(&texture->info, offset, extent);
Readback* readback = lovrReadbackCreate(&(ReadbackInfo) {
.width = extent[0],
.height = extent[1],
.format = texture->info.format
.type = READBACK_TEXTURE,
.texture.object = texture,
.texture.offset = { offset[0], offset[1], offset[2], offset[3] },
.texture.extent = { extent[0], extent[1] }
});
gpu_copy_texture_buffer(pass->stream, texture->gpu, readback->buffer, offset, 0, extent);
trackTexture(pass, texture, GPU_PHASE_TRANSFER, GPU_CACHE_TRANSFER_READ);
@ -4950,17 +4980,17 @@ Readback* lovrPassReadTally(Pass* pass, Tally* tally, uint32_t index, uint32_t c
lovrCheck(pass->info.type == PASS_TRANSFER, "This function can only be called on a transfer pass");
lovrCheck(index + count <= tally->info.count, "Tally read range exceeds the number of slots in the Tally");
for (uint32_t i = 0; i < count; i++) {
uint32_t j = index + i;
lovrCheck(tally->masks[j / 64] & (1ull << (j % 64)), "Trying to copy Tally slot %d, but it hasn't been marked yet", j + 1);
}
uint32_t stride = tally->info.type == TALLY_STAGE ? 24 : 4;
Readback* readback = lovrReadbackCreate(&(ReadbackInfo) { .size = count * stride });
Readback* readback = lovrReadbackCreate(&(ReadbackInfo) {
.type = READBACK_TALLY,
.tally.object = tally,
.tally.index = index,
.tally.count = count
});
if (tally->info.type == TALLY_TIMER) {
lovrTallyResolve(tally, index, count, readback->buffer, 0, pass->stream);
} else {
uint32_t stride = tally->info.type == TALLY_STAGE ? 24 : 4;
gpu_copy_tally_buffer(pass->stream, tally->gpu, readback->buffer, index, 0, count, stride);
}
@ -4968,12 +4998,12 @@ Readback* lovrPassReadTally(Pass* pass, Tally* tally, uint32_t index, uint32_t c
}
void lovrPassTick(Pass* pass, Tally* tally, uint32_t index) {
lovrCheck(tally->info.views == pass->cameraCount, "Tally view count does not match Pass view count");
lovrCheck(index < tally->info.count, "Trying to use tally slot #%d, but the tally only has %d slots", index + 1, tally->info.count);
lovrCheck(~tally->masks[index / 64] & (1 << (index % 64)), "Tally slot #%d has already been used", index + 1);
if (tally->tick != state.tick) {
gpu_clear_tally(state.stream, tally->gpu, 0, tally->info.count * 2 * tally->info.views);
memset(tally->masks, 0, (tally->info.count + 63) / 64 * sizeof(uint64_t));
uint32_t multiplier = tally->info.type == TALLY_TIMER ? 2 * tally->info.count * tally->info.views : 1;
gpu_clear_tally(state.stream, tally->gpu, 0, tally->info.count * multiplier);
tally->tick = state.tick;
}
@ -4985,8 +5015,8 @@ void lovrPassTick(Pass* pass, Tally* tally, uint32_t index) {
}
void lovrPassTock(Pass* pass, Tally* tally, uint32_t index) {
lovrCheck(tally->info.views == pass->cameraCount, "Tally view count does not match Pass view count");
lovrCheck(index < tally->info.count, "Trying to use tally slot #%d, but the tally only has %d slots", index + 1, tally->info.count);
lovrCheck(tally->masks[index / 64] & (1 << (index % 64)), "Tally slot #%d has not been started yet", index + 1);
if (tally->info.type == TALLY_TIMER) {
gpu_tally_mark(pass->stream, tally->gpu, index * 2 * tally->info.views + tally->info.views);

View File

@ -429,11 +429,31 @@ Buffer* lovrModelGetIndexBuffer(Model* model);
// Readback
typedef enum {
READBACK_BUFFER,
READBACK_TEXTURE,
READBACK_TALLY
} ReadbackType;
typedef struct {
uint32_t size;
uint32_t width;
uint32_t height;
uint32_t format;
ReadbackType type;
union {
struct {
Buffer* object;
uint32_t offset;
uint32_t extent;
} buffer;
struct {
Texture* object;
uint32_t offset[4];
uint32_t extent[2];
} texture;
struct {
Tally* object;
uint32_t index;
uint32_t count;
} tally;
};
} ReadbackInfo;
Readback* lovrReadbackCreate(const ReadbackInfo* info);