Readback, mostly;

This commit is contained in:
bjorn 2022-07-14 00:05:58 -07:00
parent 3bfd9ca0e1
commit 45135899a1
6 changed files with 347 additions and 60 deletions

View File

@ -448,6 +448,7 @@ if(LOVR_ENABLE_GRAPHICS)

View File

@ -1436,6 +1436,7 @@ extern const luaL_Reg lovrShader[];
extern const luaL_Reg lovrMaterial[];
extern const luaL_Reg lovrFont[];
extern const luaL_Reg lovrModel[];
extern const luaL_Reg lovrReadback[];
extern const luaL_Reg lovrTally[];
extern const luaL_Reg lovrPass[];
@ -1449,6 +1450,7 @@ int luaopen_lovr_graphics(lua_State* L) {
luax_registertype(L, Material);
luax_registertype(L, Font);
luax_registertype(L, Model);
luax_registertype(L, Readback);
luax_registertype(L, Tally);
luax_registertype(L, Pass);
return 1;

View File

@ -848,6 +848,61 @@ static int l_lovrPassMipmap(lua_State* L) {
return 0;
static int l_lovrPassRead(lua_State* L) {
Pass* pass = luax_checktype(L, 1, Pass);
Buffer* buffer = luax_totype(L, 2, Buffer);
if (buffer) {
const BufferInfo* info = lovrBufferGetInfo(buffer);
uint32_t index = luax_optu32(L, 3, 1) - 1;
uint32_t offset = index * info->stride;
uint32_t extent = luax_optu32(L, 4, info->length - index) * info->stride;
Readback* readback = lovrPassReadBuffer(pass, buffer, offset, extent);
luax_pushtype(L, Readback, readback);
lovrRelease(readback, lovrReadbackDestroy);
return 1;
Texture* texture = luax_totype(L, 2, Texture);
if (texture) {
uint32_t offset[4], extent[3];
offset[0] = luax_optu32(L, 3, 0);
offset[1] = luax_optu32(L, 4, 0);
offset[2] = luax_optu32(L, 5, 1) - 1;
offset[3] = luax_optu32(L, 6, 1) - 1;
extent[0] = luax_optu32(L, 7, ~0u);
extent[1] = luax_optu32(L, 8, ~0u);
extent[2] = 1;
Readback* readback = lovrPassReadTexture(pass, texture, offset, extent);
luax_pushtype(L, Readback, readback);
lovrRelease(readback, lovrReadbackDestroy);
return 1;
Tally* tally = luax_totype(L, 2, Tally);
if (tally) {
uint32_t index = luax_optu32(L, 3, 1) - 1;
uint32_t count = luax_optu32(L, 4, lovrTallyGetInfo(tally)->count);
Readback* readback = lovrPassReadTally(pass, tally, index, count);
luax_pushtype(L, Readback, readback);
lovrRelease(readback, lovrReadbackDestroy);
return 1;
return luax_typeerror(L, 2, "Buffer, Texture, or Tally");
static int l_lovrPassTick(lua_State* L) {
return 0;
static int l_lovrPassTock(lua_State* L) {
return 0;
const luaL_Reg lovrPass[] = {
{ "getType", l_lovrPassGetType },
@ -911,6 +966,10 @@ const luaL_Reg lovrPass[] = {
{ "copy", l_lovrPassCopy },
{ "blit", l_lovrPassBlit },
{ "mipmap", l_lovrPassMipmap },
{ "read", l_lovrPassRead },
{ "tick", l_lovrPassTick },
{ "tock", l_lovrPassTock },

View File

@ -0,0 +1,49 @@
#include "api.h"
#include "graphics/graphics.h"
#include "data/image.h"
#include "util.h"
#include <lua.h>
#include <lauxlib.h>
static int l_lovrReadbackIsComplete(lua_State* L) {
Readback* readback = luax_checktype(L, 1, Readback);
bool complete = lovrReadbackIsComplete(readback);
lua_pushboolean(L, complete);
return 1;
static int l_lovrReadbackWait(lua_State* L) {
Readback* readback = luax_checktype(L, 1, Readback);
bool waited = lovrReadbackWait(readback);
lua_pushboolean(L, waited);
return 1;
static int l_lovrReadbackGetData(lua_State* L) {
//Readback* readback = luax_checktype(L, 1, Readback);
//void* data = lovrReadbackGetData(readback);
return 1;
static int l_lovrReadbackGetBlob(lua_State* L) {
//Readback* readback = luax_checktype(L, 1, Readback);
//void* data = lovrReadbackGetData(readback);
return 1;
static int l_lovrReadbackGetImage(lua_State* L) {
Readback* readback = luax_checktype(L, 1, Readback);
Image* image = lovrReadbackGetImage(readback);
luax_pushtype(L, Image, image);
return 1;
const luaL_Reg lovrReadback[] = {
{ "isComplete", l_lovrReadbackIsComplete },
{ "wait", l_lovrReadbackWait },
{ "getData", l_lovrReadbackGetData },
{ "getImage", l_lovrReadbackGetImage },

View File

@ -213,6 +213,18 @@ struct Model {
uint32_t lastReskin;
struct Readback {
uint32_t ref;
uint32_t tick;
uint32_t size;
Readback* next;
ReadbackInfo info;
gpu_buffer* buffer;
void* pointer;
Image* image;
void* data;
struct Tally {
uint32_t ref;
uint32_t tick;
@ -357,6 +369,8 @@ static struct {
Shader* timeWizard;
Shader* defaultShaders[DEFAULT_SHADER_COUNT];
gpu_vertex_format vertexFormats[VERTEX_FORMAT_COUNT];
Readback* oldestReadback;
Readback* newestReadback;
Material* defaultMaterial;
uint32_t materialBlock;
arr_t(MaterialBlock) materialBlocks;
@ -379,6 +393,7 @@ static void tempPop(uint32_t stack);
static int u64cmp(const void* a, const void* b);
static void beginFrame(void);
static void cleanupPasses(void);
static void processReadbacks(void);
static uint32_t getLayout(gpu_slot* slots, uint32_t count);
static gpu_bundle* getBundle(uint32_t layout);
static gpu_texture* getAttachment(uint32_t size[2], uint32_t layers, TextureFormat format, bool srgb, uint32_t samples);
@ -930,7 +945,7 @@ void lovrGraphicsSubmit(Pass** passes, uint32_t count) {
void lovrGraphicsWait() {
// Buffer
@ -1789,12 +1804,12 @@ void lovrShaderGetLocalWorkgroupSize(Shader* shader, uint32_t size[3]) {
Material* lovrMaterialCreate(const MaterialInfo* info) {
MaterialBlock* block = &[state.materialBlock];
if (!block || block->head == ~0u || !gpu_finished(block->list[block->head].tick)) {
if (!block || block->head == ~0u || !gpu_is_complete(block->list[block->head].tick)) {
bool found = false;
for (size_t i = 0; i < state.materialBlocks.length; i++) {
block = &[i];
if (block->head != ~0u && gpu_finished(block->list[block->head].tick)) {
if (block->head != ~0u && gpu_is_complete(block->list[block->head].tick)) {
state.materialBlock = i;
found = true;
@ -2704,7 +2719,61 @@ static void lovrModelReskin(Model* model) {
// Readback
Readback* lovrReadbackCreate(const ReadbackInfo* info) {
Readback* readback = calloc(1, sizeof(Readback) + gpu_sizeof_buffer());
lovrAssert(readback, "Out of memory");
readback->ref = 1;
readback->tick = state.tick;
readback->info = *info;
readback->buffer = (gpu_buffer*) (readback + 1);
if (readback->info.width > 0 && readback->info.height > 0) {
readback->size = measureTexture(info->format, info->width, info->height, 1);
readback->image = lovrImageCreateRaw(info->width, info->height, info->format);
} else {
readback->size = info->size;
readback->data = malloc(info->size);
lovrAssert(readback->data, "Out of memory");
readback->pointer = gpu_map(readback->buffer, readback->size, 16, GPU_MAP_READ);
if (!state.oldestReadback) state.oldestReadback = readback;
*(state.newestReadback ? &state.newestReadback->next : &state.newestReadback) = readback;
return readback;
void lovrReadbackDestroy(void* ref) {
Readback* readback = ref;
lovrRelease(readback->image, lovrImageDestroy);
bool lovrReadbackIsComplete(Readback* readback) {
return gpu_is_complete(readback->tick);
bool lovrReadbackWait(Readback* readback) {
if ((state.tick == readback->tick && || lovrReadbackIsComplete(readback)) {
return false;
bool waited = gpu_wait_tick(readback->tick);
if (waited) {
return waited;
void* lovrReadbackGetData(Readback* readback) {
return lovrReadbackIsComplete(readback) ? readback->data : NULL;
Image* lovrReadbackGetImage(Readback* readback) {
return lovrReadbackIsComplete(readback) ? readback->image : NULL;
// Tally
@ -2749,6 +2818,62 @@ void lovrTallyDestroy(void* ref) {
const TallyInfo* lovrTallyGetInfo(Tally* tally) {
return &tally->info;
// Tally timestamps aren't very usable in their raw state, since they use unspecified units, aren't
// durations, and when using multiview there's one per view. To make them easier to work with, copy
// them to a temporary buffer, then dispatch a compute shader to subtract pairs and convert to ns,
// writing the final friendly values to a destination Buffer.
static void lovrTallyResolve(Tally* tally, uint32_t index, uint32_t count, gpu_buffer* buffer, uint32_t offset, gpu_stream* stream) {
gpu_copy_tally_buffer(stream, tally->gpu, tally->buffer, index, 0, count, 4);
gpu_sync(stream, &(gpu_barrier) {
}, 1);
if (!state.timeWizard) {
state.timeWizard = lovrShaderCreate(&(ShaderInfo) {
.source[0] = { lovr_shader_timewizard_comp, sizeof(lovr_shader_timewizard_comp) },
.label = "timewizard"
gpu_pipeline* pipeline =[state.timeWizard->computePipeline];
gpu_layout* layout =[state.timeWizard->layout].gpu;
gpu_shader* shader = state.timeWizard->gpu;
gpu_binding bindings[] = {
[0] = { 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { tally->buffer, 0, ~0u } },
[1] = { 1, GPU_SLOT_STORAGE_BUFFER, .buffer = { buffer, offset, count * sizeof(uint32_t) } }
gpu_bundle* bundle = getBundle(state.timeWizard->layout);
gpu_bundle_info bundleInfo = { layout, bindings, COUNTOF(bindings) };
gpu_bundle_write(&bundle, &bundleInfo, 1);
struct { uint32_t first, count, views; float period; } constants = {
.first = index,
.count = count,
.views = tally->info.views,
.period = state.limits.timestampPeriod
gpu_bind_pipeline(stream, pipeline, true);
gpu_bind_bundle(stream, shader, 0, bundle, NULL, 0);
gpu_push_constants(stream, shader, &constants, sizeof(constants));
gpu_compute(stream, (count + 31) / 32, 1, 1);
// Pass
Pass* lovrGraphicsGetPass(PassInfo* info) {
@ -4697,56 +4822,11 @@ void lovrPassCopyTallyToBuffer(Pass* pass, Tally* tally, Buffer* buffer, uint32_
if (tally->info.type == TALLY_TIMER) {
gpu_copy_tally_buffer(pass->stream, tally->gpu, tally->buffer, srcIndex, 0, count, 4);
// Wait for transfer to finish, then dispatch a compute shader to fixup timestamps
gpu_sync(pass->stream, &(gpu_barrier) {
}, 1);
if (!state.timeWizard) {
state.timeWizard = lovrShaderCreate(&(ShaderInfo) {
.source[0] = { lovr_shader_timewizard_comp, sizeof(lovr_shader_timewizard_comp) },
.label = "timewizard"
gpu_pipeline* pipeline =[state.timeWizard->computePipeline];
gpu_layout* layout =[state.timeWizard->layout].gpu;
gpu_shader* shader = state.timeWizard->gpu;
gpu_binding bindings[] = {
[0] = { 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { tally->buffer, 0, ~0u } },
[1] = { 1, GPU_SLOT_STORAGE_BUFFER, .buffer = { buffer->gpu, dstOffset, count * sizeof(uint32_t) } }
gpu_bundle* bundle = getBundle(state.timeWizard->layout);
gpu_bundle_info bundleInfo = { layout, bindings, COUNTOF(bindings) };
gpu_bundle_write(&bundle, &bundleInfo, 1);
struct { uint32_t first, count, views; float period; } constants = {
.first = srcIndex,
.count = count,
.views = tally->info.views,
.period = state.limits.timestampPeriod
gpu_bind_pipeline(pass->stream, pipeline, true);
gpu_bind_bundle(pass->stream, shader, 0, bundle, NULL, 0);
gpu_push_constants(pass->stream, shader, &constants, sizeof(constants));
gpu_compute(pass->stream, (count + 31) / 32, 1, 1);
lovrTallyResolve(tally, srcIndex, count, buffer->gpu, dstOffset, pass->stream);
} else {
gpu_copy_tally_buffer(pass->stream, tally->gpu, buffer->gpu, srcIndex, dstOffset, count, 4);
uint32_t stride = tally->info.type == TALLY_STAGE ? 24 : 4;
gpu_copy_tally_buffer(pass->stream, tally->gpu, buffer->gpu, srcIndex, dstOffset, count, stride);
@ -4829,7 +4909,7 @@ void lovrPassMipmap(Pass* pass, Texture* texture, uint32_t base, uint32_t count)
lovrCheck(pass->info.type == PASS_TRANSFER, "This function can only be called on a transfer pass");
lovrCheck(!texture->info.parent, "Can not mipmap a Texture view");
lovrCheck(texture->info.samples == 1, "Can not mipmap a multisampled texture");
lovrCheck(texture->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to mipmap %s it", "from");
lovrCheck(texture->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to mipmap it");
lovrCheck(state.features.formats[texture->info.format] & GPU_FEATURE_BLIT_SRC, "This GPU does not support blitting %s the source texture's format, which is required for mipmapping", "from");
lovrCheck(state.features.formats[texture->info.format] & GPU_FEATURE_BLIT_DST, "This GPU does not support blitting %s the source texture's format, which is required for mipmapping", "to");
lovrCheck(base + count < texture->info.mipmaps, "Trying to generate too many mipmaps");
@ -4837,6 +4917,56 @@ void lovrPassMipmap(Pass* pass, Texture* texture, uint32_t base, uint32_t count)
Readback* lovrPassReadBuffer(Pass* pass, Buffer* buffer, uint32_t offset, uint32_t extent) {
lovrCheck(pass->info.type == PASS_TRANSFER, "This function can only be called on a transfer pass");
lovrCheck(!lovrBufferIsTemporary(buffer), "Unable to read back a temporary buffer");
lovrCheck(offset + extent <= buffer->size, "Tried to read past the end of the Buffer");
Readback* readback = lovrReadbackCreate(&(ReadbackInfo) { .size = extent });
gpu_copy_buffers(pass->stream, buffer->gpu, readback->buffer, offset, 0, extent);
return readback;
Readback* lovrPassReadTexture(Pass* pass, Texture* texture, uint32_t offset[4], uint32_t extent[3]) {
if (extent[0] == ~0u) extent[0] = texture->info.width - offset[0];
if (extent[1] == ~0u) extent[1] = texture->info.height - offset[1];
lovrCheck(extent[2] == 1, "Currently, only one layer can be read from a Texture");
lovrCheck(pass->info.type == PASS_TRANSFER, "This function can only be called on a transfer pass");
lovrCheck(!texture->info.parent, "Can not read from a Texture view");
lovrCheck(texture->info.samples == 1, "Can not read from a multisampled texture");
lovrCheck(texture->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to read from it");
checkTextureBounds(&texture->info, offset, extent);
Readback* readback = lovrReadbackCreate(&(ReadbackInfo) {
.width = extent[0],
.height = extent[1],
.format = texture->info.format
gpu_copy_texture_buffer(pass->stream, texture->gpu, readback->buffer, offset, 0, extent);
return readback;
Readback* lovrPassReadTally(Pass* pass, Tally* tally, uint32_t index, uint32_t count) {
lovrCheck(pass->info.type == PASS_TRANSFER, "This function can only be called on a transfer pass");
lovrCheck(index + count <= tally->info.count, "Tally read range exceeds the number of slots in the Tally");
for (uint32_t i = 0; i < count; i++) {
uint32_t j = index + i;
lovrCheck(tally->masks[j / 64] & (1ull << (j % 64)), "Trying to copy Tally slot %d, but it hasn't been marked yet", j + 1);
uint32_t stride = tally->info.type == TALLY_STAGE ? 24 : 4;
Readback* readback = lovrReadbackCreate(&(ReadbackInfo) { .size = count * stride });
if (tally->info.type == TALLY_TIMER) {
lovrTallyResolve(tally, index, count, readback->buffer, 0, pass->stream);
} else {
gpu_copy_tally_buffer(pass->stream, tally->gpu, readback->buffer, index, 0, count, stride);
return readback;
void lovrPassTick(Pass* pass, Tally* tally, uint32_t index) {
lovrCheck(index < tally->info.count, "Trying to use tally slot #%d, but the tally only has %d slots", index + 1, tally->info.count);
lovrCheck(~tally->masks[index / 64] & (1 << (index % 64)), "Tally slot #%d has already been used", index + 1);
@ -4908,6 +5038,7 @@ static void beginFrame(void) {
state.tick = gpu_begin(); = gpu_stream_begin("Internal uploads");
state.allocator.cursor = 0;
// Clean up ALL passes created during the frame, even unsubmitted ones
@ -4921,17 +5052,41 @@ static void cleanupPasses(void) {
lovrRelease(access->texture, lovrTextureDestroy);
for (size_t j = 0; j <= pass->pipelineIndex; j++) {
lovrRelease(pass->pipelines[j].sampler, lovrSamplerDestroy);
lovrRelease(pass->pipelines[j].shader, lovrShaderDestroy);
lovrRelease(pass->pipelines[j].material, lovrMaterialDestroy);
pass->pipelines[j].sampler = NULL;
pass->pipelines[j].shader = NULL;
pass->pipelines[j].material = NULL;
if (pass->info.type == PASS_RENDER) {
for (size_t j = 0; j <= pass->pipelineIndex; j++) {
lovrRelease(pass->pipelines[j].sampler, lovrSamplerDestroy);
lovrRelease(pass->pipelines[j].shader, lovrShaderDestroy);
lovrRelease(pass->pipelines[j].material, lovrMaterialDestroy);
pass->pipelines[j].sampler = NULL;
pass->pipelines[j].shader = NULL;
pass->pipelines[j].material = NULL;
static void processReadbacks(void) {
while (state.oldestReadback && gpu_is_complete(state.oldestReadback->tick)) {
Readback* readback = state.oldestReadback;
if (readback->image) {
size_t size = lovrImageGetLayerSize(readback->image, 0);
void* data = lovrImageGetLayerData(readback->image, 0, 0);
memcpy(data, readback->pointer, size);
} else {
memcpy(readback->data, readback->pointer, readback->size);
Readback* next = readback->next;
lovrRelease(readback, lovrReadbackDestroy);
state.oldestReadback = next;
if (!state.oldestReadback) {
state.newestReadback = NULL;
static uint32_t getLayout(gpu_slot* slots, uint32_t count) {
uint64_t hash = hash64(slots, count * sizeof(gpu_slot));
@ -4979,7 +5134,7 @@ static gpu_bundle* getBundle(uint32_t layoutIndex) {
pool->tick = state.tick;
pool = layout->head;
if (pool && gpu_finished(pool->tick)) {
if (pool && gpu_is_complete(pool->tick)) {
pool->cursor = 1;
return pool->bundles;

View File

@ -16,6 +16,7 @@ typedef struct Shader Shader;
typedef struct Material Material;
typedef struct Font Font;
typedef struct Model Model;
typedef struct Readback Readback;
typedef struct Tally Tally;
typedef struct Pass Pass;
@ -426,6 +427,23 @@ Material* lovrModelGetMaterial(Model* model, uint32_t index);
Buffer* lovrModelGetVertexBuffer(Model* model);
Buffer* lovrModelGetIndexBuffer(Model* model);
// Readback
typedef struct {
uint32_t size;
uint32_t width;
uint32_t height;
uint32_t format;
} ReadbackInfo;
Readback* lovrReadbackCreate(const ReadbackInfo* info);
void lovrReadbackDestroy(void* ref);
const ReadbackInfo* lovrReadbackGetInfo(Readback* readback);
bool lovrReadbackIsComplete(Readback* readback);
bool lovrReadbackWait(Readback* readback);
void* lovrReadbackGetData(Readback* readback);
struct Image* lovrReadbackGetImage(Readback* readback);
// Tally
typedef enum {
@ -591,5 +609,8 @@ void lovrPassCopyImageToTexture(Pass* pass, struct Image* src, Texture* dst, uin
void lovrPassCopyTextureToTexture(Pass* pass, Texture* src, Texture* dst, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t extent[3]);
void lovrPassBlit(Pass* pass, Texture* src, Texture* dst, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t srcExtent[3], uint32_t dstExtent[3], FilterMode filter);
void lovrPassMipmap(Pass* pass, Texture* texture, uint32_t base, uint32_t count);
Readback* lovrPassReadBuffer(Pass* pass, Buffer* buffer, uint32_t index, uint32_t count);
Readback* lovrPassReadTexture(Pass* pass, Texture* texture, uint32_t offset[4], uint32_t extent[3]);
Readback* lovrPassReadTally(Pass* pass, Tally* tally, uint32_t index, uint32_t count);
void lovrPassTick(Pass* pass, Tally* tally, uint32_t index);
void lovrPassTock(Pass* pass, Tally* tally, uint32_t index);