Compare commits

...

9 Commits

Author SHA1 Message Date
bjorn 7b5c816345 More efficient resource tracking;
- Temporary buffers are not tracked
- Sample-only textures are not tracked, but their initial upload is synchronized.
- Default texture something something
2022-06-04 14:54:04 -07:00
bjorn 6245aaa9d9 I have made a mistake; 2022-06-04 14:49:58 -07:00
bjorn 3a43d44460 Pass tracks resources for sync purposes; 2022-06-04 14:28:23 -07:00
bjorn ea80936af7 Pass:compute; 2022-06-04 11:54:05 -07:00
bjorn d8583c6a9d gpu_compute; gpu_compute_indirect; 2022-06-04 11:53:59 -07:00
bjorn d3a1a0ec22 Pass:cube; Pass:box; 2022-06-04 11:28:35 -07:00
bjorn 3013567e73 Fix Pass:translate; 2022-06-04 11:28:31 -07:00
bjorn e07a2691e0 Clean up plane; 2022-06-04 11:19:28 -07:00
bjorn 62f2d9a800 Organization; 2022-06-04 09:57:06 -07:00
6 changed files with 386 additions and 90 deletions

View File

@ -135,8 +135,8 @@ static int l_lovrPassOrigin(lua_State* L) {
static int l_lovrPassTranslate(lua_State* L) {
float translation[4];
Pass* pass = luax_checktype(L, 2, Pass);
luax_readvec3(L, 1, translation, NULL);
Pass* pass = luax_checktype(L, 1, Pass);
luax_readvec3(L, 2, translation, NULL);
lovrPassTranslate(pass, translation);
return 0;
}
@ -420,9 +420,40 @@ static int l_lovrPassPlane(lua_State* L) {
Pass* pass = luax_checktype(L, 1, Pass);
float transform[16];
int index = luax_readmat4(L, 2, transform, 2);
uint32_t hsegments = luax_optu32(L, index++, 1);
uint32_t vsegments = luax_optu32(L, index, hsegments);
lovrPassPlane(pass, transform, hsegments, vsegments);
uint32_t cols = luax_optu32(L, index++, 1);
uint32_t rows = luax_optu32(L, index, cols);
lovrPassPlane(pass, transform, cols, rows);
return 0;
}
static int l_lovrPassCube(lua_State* L) {
Pass* pass = luax_checktype(L, 1, Pass);
float transform[16];
luax_readmat4(L, 2, transform, 1);
lovrPassBox(pass, transform);
return 0;
}
static int l_lovrPassBox(lua_State* L) {
Pass* pass = luax_checktype(L, 1, Pass);
float transform[16];
luax_readmat4(L, 2, transform, 3);
lovrPassBox(pass, transform);
return 0;
}
static int l_lovrPassCompute(lua_State* L) {
Pass* pass = luax_checktype(L, 1, Pass);
Buffer* buffer = luax_totype(L, 2, Buffer);
if (buffer) {
uint32_t offset = lua_tointeger(L, 3);
lovrPassCompute(pass, 0, 0, 0, buffer, offset);
} else {
uint32_t x = luax_optu32(L, 2, 1);
uint32_t y = luax_optu32(L, 3, 1);
uint32_t z = luax_optu32(L, 4, 1);
lovrPassCompute(pass, x, y, z, NULL, 0);
}
return 0;
}
@ -604,6 +635,10 @@ const luaL_Reg lovrPass[] = {
{ "points", l_lovrPassPoints },
{ "line", l_lovrPassLine },
{ "plane", l_lovrPassPlane },
{ "cube", l_lovrPassCube },
{ "box", l_lovrPassBox },
{ "compute", l_lovrPassCompute },
{ "clear", l_lovrPassClear },
{ "copy", l_lovrPassCopy },

View File

@ -544,6 +544,8 @@ void gpu_draw(gpu_stream* stream, uint32_t vertexCount, uint32_t instanceCount,
void gpu_draw_indexed(gpu_stream* stream, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, uint32_t baseVertex, uint32_t baseInstance);
void gpu_draw_indirect(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset, uint32_t drawCount);
void gpu_draw_indirect_indexed(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset, uint32_t drawCount);
void gpu_compute(gpu_stream* stream, uint32_t x, uint32_t y, uint32_t z);
void gpu_compute_indirect(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset);
void gpu_copy_buffers(gpu_stream* stream, gpu_buffer* src, gpu_buffer* dst, uint32_t srcOffset, uint32_t dstOffset, uint32_t size);
void gpu_copy_textures(gpu_stream* stream, gpu_texture* src, gpu_texture* dst, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t size[3]);
void gpu_copy_buffer_texture(gpu_stream* stream, gpu_buffer* src, gpu_texture* dst, uint32_t srcOffset, uint32_t dstOffset[4], uint32_t extent[3]);

View File

@ -1492,6 +1492,14 @@ void gpu_draw_indirect_indexed(gpu_stream* stream, gpu_buffer* buffer, uint32_t
vkCmdDrawIndexedIndirect(stream->commands, buffer->handle, buffer->offset + offset, drawCount, 20);
}
void gpu_compute(gpu_stream* stream, uint32_t x, uint32_t y, uint32_t z) {
vkCmdDispatch(stream->commands, x, y, z);
}
void gpu_compute_indirect(gpu_stream* stream, gpu_buffer* buffer, uint32_t offset) {
vkCmdDispatchIndirect(stream->commands, buffer->handle, buffer->offset + offset);
}
void gpu_copy_buffers(gpu_stream* stream, gpu_buffer* src, gpu_buffer* dst, uint32_t srcOffset, uint32_t dstOffset, uint32_t size) {
vkCmdCopyBuffer(stream->commands, src->handle, dst->handle, 1, &(VkBufferCopy) {
.srcOffset = src->offset + srcOffset,

View File

@ -81,7 +81,7 @@ static size_t measure(uint32_t w, uint32_t h, TextureFormat format) {
Image* lovrImageCreateRaw(uint32_t width, uint32_t height, TextureFormat format) {
lovrCheck(width > 0 && height > 0, "Image dimensions must be positive");
lovrCheck(format < FORMAT_BC1, "Blank images cannot be compressed");
size_t size = measure(format, width, height);
size_t size = measure(width, height, format);
void* data = malloc(size);
Image* image = calloc(1, sizeof(Image));
lovrAssert(image && data, "Out of memory");
@ -178,7 +178,7 @@ void* lovrImageGetLayerData(Image* image, uint32_t level, uint32_t layer) {
void lovrImageGetPixel(Image* image, uint32_t x, uint32_t y, float pixel[4]) {
lovrCheck(!lovrImageIsCompressed(image), "Unable to access individual pixels of a compressed image");
lovrAssert(x < image->width && y < image->height, "Pixel coordinates must be within Image bounds");
size_t offset = measure(image->format, y * image->width + x, 1);
size_t offset = measure(y * image->width + x, 1, image->format);
uint8_t* u8 = (uint8_t*) image->mipmaps[0].data + offset;
uint16_t* u16 = (uint16_t*) u8;
float* f32 = (float*) u8;
@ -799,7 +799,7 @@ static Image* loadDDS(Blob* blob) {
size_t stride = 0;
for (uint32_t i = 0, width = image->width, height = image->height; i < levels; i++) {
size_t size = measure(format, width, height);
size_t size = measure(width, height, format);
lovrAssert(length >= size, "DDS file overflow");
image->mipmaps[i] = (Mipmap) { data, size, 0 };
width = MAX(width >> 1, 1);

View File

@ -20,7 +20,7 @@ uint32_t os_vk_create_surface(void* instance, void** surface);
const char** os_vk_get_instance_extensions(uint32_t* count);
#define MAX_FRAME_MEMORY (1 << 30)
#define MAX_RESOURCES_PER_SHADER 32
#define MAX_SHADER_RESOURCES 32
typedef struct {
gpu_vertex_format gpu;
@ -28,6 +28,14 @@ typedef struct {
uint32_t mask;
} VertexFormat;
typedef struct {
gpu_phase readPhase;
gpu_phase writePhase;
gpu_cache pendingReads;
gpu_cache pendingWrites;
uint32_t lastWriteIndex;
} Sync;
struct Buffer {
uint32_t ref;
uint32_t size;
@ -35,6 +43,7 @@ struct Buffer {
gpu_buffer* gpu;
BufferInfo info;
VertexFormat format;
Sync sync;
};
struct Texture {
@ -42,6 +51,7 @@ struct Texture {
gpu_texture* gpu;
gpu_texture* renderView;
TextureInfo info;
Sync sync;
};
struct Sampler {
@ -107,6 +117,14 @@ typedef struct {
float color[4];
} DrawData;
typedef struct {
Sync* sync;
Buffer* buffer;
Texture* texture;
gpu_phase phase;
gpu_cache cache;
} Access;
struct Pass {
uint32_t ref;
PassInfo info;
@ -130,6 +148,7 @@ struct Pass {
gpu_binding builtins[3];
gpu_buffer* vertexBuffer;
gpu_buffer* indexBuffer;
arr_t(Access) access;
};
typedef enum {
@ -143,8 +162,8 @@ typedef enum {
typedef struct {
struct { float x, y, z; } position;
uint32_t normal;
struct { uint16_t u, v; } uv;
struct { float x, y, z; } normal;
struct { float u, v; } uv;
} ShapeVertex;
typedef struct {
@ -205,6 +224,7 @@ static struct {
bool active;
uint32_t tick;
Pass* transfers;
bool syncTextureUpload;
gpu_device_info device;
gpu_features features;
gpu_limits limits;
@ -226,6 +246,7 @@ static struct {
// Helpers
static void* tempAlloc(size_t size);
static void* tempGrow(void* p, size_t size);
static void beginFrame(void);
static gpu_stream* getTransfers(void);
static uint32_t getLayout(gpu_slot* slots, uint32_t count);
@ -233,7 +254,9 @@ static gpu_bundle* getBundle(uint32_t layout);
static gpu_texture* getAttachment(uint32_t size[2], uint32_t layers, TextureFormat format, bool srgb, uint32_t samples);
static size_t measureTexture(TextureFormat format, uint16_t w, uint16_t h, uint16_t d);
static void checkTextureBounds(const TextureInfo* info, uint32_t offset[4], uint32_t extent[3]);
static uint32_t findShaderSlot(Shader* shader, const char* name, size_t length);
static ShaderResource* findShaderResource(Shader* shader, const char* name, size_t length, uint32_t slot);
static void trackBuffer(Pass* pass, Buffer* buffer, gpu_phase phase, gpu_cache cache);
static void trackTexture(Pass* pass, Texture* texture, gpu_phase phase, gpu_cache cache);
static void checkShaderFeatures(uint32_t* features, uint32_t count);
static void onMessage(void* context, const char* message, bool severe);
@ -288,9 +311,18 @@ bool lovrGraphicsInit(bool debug, bool vsync) {
.label = "Default Buffer"
}, NULL);
Image* image = lovrImageCreateRaw(4, 4, FORMAT_RGBA8);
float white[4] = { 1.f, 1.f, 1.f, 1.f };
for (uint32_t y = 0; y < 4; y++) {
for (uint32_t x = 0; x < 4; x++) {
lovrImageSetPixel(image, x, y, white);
}
}
state.defaultTexture = lovrTextureCreate(&(TextureInfo) {
.type = TEXTURE_2D,
.usage = TEXTURE_SAMPLE | TEXTURE_TRANSFER,
.usage = TEXTURE_SAMPLE,
.format = FORMAT_RGBA8,
.width = 4,
.height = 4,
@ -298,9 +330,13 @@ bool lovrGraphicsInit(bool debug, bool vsync) {
.mipmaps = 1,
.samples = 1,
.srgb = true,
.imageCount = 1,
.images = &image,
.label = "Default Texture"
});
lovrRelease(image, lovrImageDestroy);
state.defaultSampler = lovrSamplerCreate(&(SamplerInfo) {
.min = FILTER_LINEAR,
.mag = FILTER_LINEAR,
@ -310,15 +346,14 @@ bool lovrGraphicsInit(bool debug, bool vsync) {
gpu_stream* transfers = getTransfers();
gpu_clear_buffer(transfers, state.defaultBuffer->gpu, 0, 4096);
gpu_clear_texture(transfers, state.defaultTexture->gpu, (float[4]) { 1.f, 1.f, 1.f, 1.f }, 0, ~0u, 0, ~0u);
state.vertexFormats[VERTEX_SHAPE].gpu = (gpu_vertex_format) {
.bufferCount = 2,
.attributeCount = 3,
.bufferStrides[1] = sizeof(ShapeVertex),
.attributes[0] = { 1, 0, offsetof(ShapeVertex, position), GPU_TYPE_F32x3 },
.attributes[1] = { 1, 1, offsetof(ShapeVertex, normal), GPU_TYPE_UN10x3 },
.attributes[2] = { 1, 2, offsetof(ShapeVertex, uv), GPU_TYPE_UN16x2 }
.attributes[1] = { 1, 1, offsetof(ShapeVertex, normal), GPU_TYPE_F32x3 },
.attributes[2] = { 1, 2, offsetof(ShapeVertex, uv), GPU_TYPE_F32x2 }
};
state.vertexFormats[VERTEX_POINT].gpu = (gpu_vertex_format) {
@ -410,12 +445,12 @@ void lovrGraphicsGetLimits(GraphicsLimits* limits) {
limits->renderSize[0] = state.limits.renderSize[0];
limits->renderSize[1] = state.limits.renderSize[1];
limits->renderSize[2] = state.limits.renderSize[2];
limits->uniformBuffersPerStage = MIN(state.limits.uniformBuffersPerStage - 2, MAX_RESOURCES_PER_SHADER);
limits->storageBuffersPerStage = MIN(state.limits.storageBuffersPerStage, MAX_RESOURCES_PER_SHADER);
limits->sampledTexturesPerStage = MIN(state.limits.sampledTexturesPerStage, MAX_RESOURCES_PER_SHADER);
limits->storageTexturesPerStage = MIN(state.limits.storageTexturesPerStage, MAX_RESOURCES_PER_SHADER);
limits->samplersPerStage = MIN(state.limits.samplersPerStage - 1, MAX_RESOURCES_PER_SHADER);
limits->resourcesPerShader = MAX_RESOURCES_PER_SHADER;
limits->uniformBuffersPerStage = MIN(state.limits.uniformBuffersPerStage - 2, MAX_SHADER_RESOURCES);
limits->storageBuffersPerStage = MIN(state.limits.storageBuffersPerStage, MAX_SHADER_RESOURCES);
limits->sampledTexturesPerStage = MIN(state.limits.sampledTexturesPerStage, MAX_SHADER_RESOURCES);
limits->storageTexturesPerStage = MIN(state.limits.storageTexturesPerStage, MAX_SHADER_RESOURCES);
limits->samplersPerStage = MIN(state.limits.samplersPerStage - 1, MAX_SHADER_RESOURCES);
limits->resourcesPerShader = MAX_SHADER_RESOURCES;
limits->uniformBufferRange = state.limits.uniformBufferRange;
limits->storageBufferRange = state.limits.storageBufferRange;
limits->uniformBufferAlign = state.limits.uniformBufferAlign;
@ -482,6 +517,15 @@ void lovrGraphicsSubmit(Pass** passes, uint32_t count) {
if (state.transfers) {
streams[extraPassCount++] = state.transfers->stream;
if (state.syncTextureUpload) {
gpu_barrier barrier;
barrier.prev = GPU_PHASE_COPY;
barrier.next = GPU_PHASE_SHADER_VERTEX | GPU_PHASE_SHADER_FRAGMENT | GPU_PHASE_SHADER_COMPUTE;
barrier.flush = GPU_CACHE_TRANSFER_WRITE;
barrier.invalidate = GPU_CACHE_TEXTURE;
gpu_sync(state.transfers->stream, &barrier, 1);
}
}
for (uint32_t i = 0; i < count; i++) {
@ -492,6 +536,8 @@ void lovrGraphicsSubmit(Pass** passes, uint32_t count) {
streams[extraPassCount + i] = passes[i]->stream;
if (passes[i]->info.type == PASS_RENDER) {
gpu_render_end(passes[i]->stream);
} else if (passes[i]->info.type == PASS_COMPUTE) {
gpu_compute_end(passes[i]->stream);
}
}
@ -778,6 +824,10 @@ Texture* lovrTextureCreate(TextureInfo* info) {
}
}
if (info->usage == TEXTURE_SAMPLE && info->imageCount > 0) {
state.syncTextureUpload = true;
}
return texture;
}
@ -1118,8 +1168,8 @@ Shader* lovrShaderCreate(ShaderInfo* info) {
uint32_t index = shader->resourceCount++;
if (shader->resourceCount > MAX_RESOURCES_PER_SHADER) {
lovrThrow("Shader resource count exceeds resourcesPerShader limit (%d)", MAX_RESOURCES_PER_SHADER);
if (shader->resourceCount > MAX_SHADER_RESOURCES) {
lovrThrow("Shader resource count exceeds resourcesPerShader limit (%d)", MAX_SHADER_RESOURCES);
}
lovrCheck(resource->binding < 32, "Max resource binding number is %d", 32 - 1);
@ -1260,11 +1310,30 @@ Pass* lovrGraphicsGetPass(PassInfo* info) {
pass->ref = 1;
pass->info = *info;
pass->stream = gpu_stream_begin(info->label);
arr_init(&pass->access, tempGrow);
if (info->type != PASS_RENDER) {
if (info->type == PASS_TRANSFER) {
return pass;
}
if (info->type == PASS_COMPUTE) {
memset(pass->constants, 0, sizeof(pass->constants));
pass->constantsDirty = true;
pass->bindingMask = 0;
pass->bindingsDirty = true;
pass->pipeline = &pass->pipelines[0];
pass->pipeline->shader = NULL;
pass->pipeline->dirty = true;
gpu_compute_begin(pass->stream);
return pass;
}
// Validation
Canvas* canvas = &info->canvas;
const TextureInfo* main = canvas->textures[0] ? &canvas->textures[0]->info : &canvas->depth.texture->info;
lovrCheck(canvas->textures[0] || canvas->depth.texture, "Render pass must have at least one color or depth texture");
@ -1300,12 +1369,15 @@ Pass* lovrGraphicsGetPass(PassInfo* info) {
}
}
// Render target
gpu_canvas target = {
.size = { main->width, main->height }
};
for (uint32_t i = 0; i < colorTextureCount; i++) {
if (main->samples == 1 && canvas->samples > 1) {
lovrCheck(canvas->loads[i] != LOAD_KEEP, "When internal multisampling is used, render pass textures must be cleared");
TextureFormat format = canvas->textures[i]->info.format;
bool srgb = canvas->textures[i]->info.srgb;
target.color[i].texture = getAttachment(target.size, main->depth, format, srgb, canvas->samples);
@ -1320,10 +1392,16 @@ Pass* lovrGraphicsGetPass(PassInfo* info) {
target.color[i].clear[1] = lovrMathGammaToLinear(canvas->clears[i][1]);
target.color[i].clear[2] = lovrMathGammaToLinear(canvas->clears[i][2]);
target.color[i].clear[3] = canvas->clears[i][2];
gpu_cache cache = GPU_CACHE_BLEND_WRITE | (canvas->loads[i] == LOAD_KEEP ? GPU_CACHE_BLEND_READ : 0);
trackTexture(pass, canvas->textures[i], GPU_PHASE_BLEND, cache);
}
if (canvas->depth.texture) {
target.depth.texture = canvas->depth.texture->renderView;
gpu_phase phase = canvas->depth.load == LOAD_KEEP ? GPU_PHASE_DEPTH_EARLY : GPU_PHASE_DEPTH_LATE;
gpu_cache cache = GPU_CACHE_DEPTH_WRITE | (canvas->depth.load == LOAD_KEEP ? GPU_CACHE_DEPTH_READ : 0);
trackTexture(pass, canvas->depth.texture, phase, cache);
} else if (canvas->depth.format) {
target.depth.texture = getAttachment(target.size, main->depth, canvas->depth.format, false, canvas->samples);
}
@ -1334,10 +1412,9 @@ Pass* lovrGraphicsGetPass(PassInfo* info) {
target.depth.clear.depth = canvas->depth.clear;
}
gpu_render_begin(pass->stream, &target);
// Begin render pass
uint32_t offset = 0;
gpu_bind_vertex_buffers(pass->stream, &state.defaultBuffer->gpu, &offset, 0, 1);
gpu_render_begin(pass->stream, &target);
float viewport[4] = { 0.f, 0.f, (float) main->width, (float) main->height };
float depthRange[2] = { 0.f, 1.f };
@ -1346,32 +1423,34 @@ Pass* lovrGraphicsGetPass(PassInfo* info) {
uint32_t scissor[4] = { 0, 0, main->width, main->height };
gpu_set_scissor(pass->stream, scissor);
// The default Buffer (filled with zero) is always at slot #0, used for missing vertex attributes
uint32_t offset = 0;
gpu_bind_vertex_buffers(pass->stream, &state.defaultBuffer->gpu, &offset, 0, 1);
// Reset state
pass->transform = pass->transforms[0];
mat4_identity(pass->transform);
pass->pipeline = &pass->pipelines[0];
memset(&pass->pipeline->info, 0, sizeof(pass->pipeline->info));
pass->pipeline->info = (gpu_pipeline_info) {
.colorCount = colorTextureCount,
.depth.format = canvas->depth.texture ? canvas->depth.texture->info.format : canvas->depth.format,
.multisample.count = canvas->samples,
.viewCount = main->depth,
.depth.test = GPU_COMPARE_LEQUAL,
.depth.write = true
};
pass->pipeline->info.colorCount = colorTextureCount;
for (uint32_t i = 0; i < colorTextureCount; i++) {
pass->pipeline->info.color[i].format = canvas->textures[i]->info.format;
pass->pipeline->info.color[i].srgb = canvas->textures[i]->info.srgb;
pass->pipeline->info.color[i].mask = 0xf;
}
pass->pipeline->info.depth.format = canvas->depth.texture ? canvas->depth.texture->info.format : canvas->depth.format;
pass->pipeline->info.viewCount = main->depth;
pass->pipeline->info.multisample.count = canvas->samples;
pass->pipeline->info.depth.test = GPU_COMPARE_LEQUAL;
pass->pipeline->info.depth.write = true;
pass->pipeline->info.color[0].mask = 0xf;
pass->pipeline->info.color[1].mask = 0xf;
pass->pipeline->info.color[2].mask = 0xf;
pass->pipeline->info.color[3].mask = 0xf;
pass->pipeline->color[0] = 1.f;
pass->pipeline->color[1] = 1.f;
pass->pipeline->color[2] = 1.f;
pass->pipeline->color[3] = 1.f;
float defaultColor[4] = { 1.f, 1.f, 1.f, 1.f };
memcpy(pass->pipeline->color, defaultColor, sizeof(defaultColor));
pass->pipeline->formatHash = 0;
pass->pipeline->shader = NULL;
pass->pipeline->dirty = true;
@ -1383,12 +1462,9 @@ Pass* lovrGraphicsGetPass(PassInfo* info) {
pass->cameraCount = main->depth;
pass->cameras = tempAlloc(pass->cameraCount * sizeof(Camera));
for (uint32_t i = 0; i < pass->cameraCount; i++) {
mat4_identity(pass->cameras[i].view);
float fov = 1.0f;
float aspect = (float) main->width / main->height;
mat4_perspective(pass->cameras[i].projection, .01f, 100.f, fov, aspect);
mat4_perspective(pass->cameras[i].projection, .01f, 100.f, 1.0f, (float) main->width / main->height);
}
pass->cameraDirty = true;
@ -1408,11 +1484,7 @@ Pass* lovrGraphicsGetPass(PassInfo* info) {
}
void lovrPassDestroy(void* ref) {
Pass* pass = ref;
for (uint32_t i = 0; i <= pass->pipelineIndex; i++) {
lovrRelease(pass->pipelines[i].shader, lovrShaderDestroy);
pass->pipelines[i].shader = NULL;
}
//
}
const PassInfo* lovrPassGetInfo(Pass* pass) {
@ -1726,7 +1798,8 @@ void lovrPassSetWireframe(Pass* pass, bool wireframe) {
void lovrPassSendBuffer(Pass* pass, const char* name, size_t length, uint32_t slot, Buffer* buffer, uint32_t offset, uint32_t extent) {
Shader* shader = pass->pipeline->shader;
lovrCheck(shader, "A Shader must be active to send resources");
slot = name ? findShaderSlot(shader, name, length) : slot;
ShaderResource* resource = findShaderResource(shader, name, length, slot);
slot = resource->binding;
lovrCheck(shader->bufferMask & (1 << slot), "Trying to send a Buffer to slot %d, but the active Shader doesn't have a Buffer in that slot");
lovrCheck(offset < buffer->size, "Buffer offset is past the end of the Buffer");
@ -1754,12 +1827,27 @@ void lovrPassSendBuffer(Pass* pass, const char* name, size_t length, uint32_t sl
pass->bindings[slot].buffer.extent = extent;
pass->bindingMask |= (1 << slot);
pass->bindingsDirty = true;
gpu_phase phase = 0;
gpu_cache cache = 0;
if (pass->info.type == PASS_RENDER) {
if (resource->stageMask & GPU_STAGE_VERTEX) phase |= GPU_PHASE_SHADER_VERTEX;
if (resource->stageMask & GPU_STAGE_FRAGMENT) phase |= GPU_PHASE_SHADER_FRAGMENT;
cache = (shader->storageMask & (1 << slot)) ? GPU_CACHE_STORAGE_READ : GPU_CACHE_UNIFORM;
} else {
phase = GPU_PHASE_SHADER_COMPUTE;
cache = (shader->storageMask & (1 << slot)) ? GPU_CACHE_STORAGE_WRITE : GPU_CACHE_UNIFORM; // TODO readonly
}
trackBuffer(pass, buffer, phase, cache);
}
void lovrPassSendTexture(Pass* pass, const char* name, size_t length, uint32_t slot, Texture* texture) {
Shader* shader = pass->pipeline->shader;
lovrCheck(shader, "A Shader must be active to send resources");
slot = name ? findShaderSlot(shader, name, length) : slot;
ShaderResource* resource = findShaderResource(shader, name, length, slot);
slot = resource->binding;
lovrCheck(shader->textureMask & (1 << slot), "Trying to send a Texture to slot %d, but the active Shader doesn't have a Texture in that slot");
@ -1772,12 +1860,27 @@ void lovrPassSendTexture(Pass* pass, const char* name, size_t length, uint32_t s
pass->bindings[slot].texture = texture->gpu;
pass->bindingMask |= (1 << slot);
pass->bindingsDirty = true;
gpu_phase phase = 0;
gpu_cache cache = 0;
if (pass->info.type == PASS_RENDER) {
if (resource->stageMask & GPU_STAGE_VERTEX) phase |= GPU_PHASE_SHADER_VERTEX;
if (resource->stageMask & GPU_STAGE_FRAGMENT) phase |= GPU_PHASE_SHADER_FRAGMENT;
cache = (shader->storageMask & (1 << slot)) ? GPU_CACHE_STORAGE_READ : GPU_CACHE_TEXTURE;
} else {
phase = GPU_PHASE_SHADER_COMPUTE;
cache = (shader->storageMask & (1 << slot)) ? GPU_CACHE_STORAGE_WRITE : GPU_CACHE_TEXTURE; // TODO readonly
}
trackTexture(pass, texture, phase, cache);
}
void lovrPassSendSampler(Pass* pass, const char* name, size_t length, uint32_t slot, Sampler* sampler) {
Shader* shader = pass->pipeline->shader;
lovrCheck(shader, "A Shader must be active to send resources");
slot = name ? findShaderSlot(shader, name, length) : slot;
ShaderResource* resource = findShaderResource(shader, name, length, slot);
slot = resource->binding;
lovrCheck(shader->samplerMask & (1 << slot), "Trying to send a Sampler to slot %d, but the active Shader doesn't have a Sampler in that slot");
@ -1859,6 +1962,7 @@ static void flushBindings(Pass* pass, Shader* shader) {
return;
}
uint32_t set = pass->info.type == PASS_RENDER ? 2 : 0;
gpu_binding* bindings = tempAlloc(shader->resourceCount * sizeof(gpu_binding));
for (uint32_t i = 0; i < shader->resourceCount; i++) {
@ -1873,7 +1977,7 @@ static void flushBindings(Pass* pass, Shader* shader) {
gpu_bundle* bundle = getBundle(shader->layout);
gpu_bundle_write(&bundle, &info, 1);
gpu_bind_bundle(pass->stream, shader->gpu, 2, bundle, NULL, 0);
gpu_bind_bundle(pass->stream, shader->gpu, set, bundle, NULL, 0);
}
static void flushBuiltins(Pass* pass, Draw* draw, Shader* shader) {
@ -1934,6 +2038,7 @@ static void flushBuffers(Pass* pass, Draw* draw) {
uint32_t vertexOffset = 0;
if (!draw->vertex.buffer && draw->vertex.count > 0) {
lovrCheck(draw->vertex.count < UINT16_MAX, "This draw has too many vertices (max is 65534), try splitting it up into multiple draws or using a Buffer");
uint32_t stride = state.vertexFormats[draw->vertex.format].gpu.bufferStrides[1];
uint32_t size = draw->vertex.count * stride;
@ -1952,6 +2057,7 @@ static void flushBuffers(Pass* pass, Draw* draw) {
lovrCheck(draw->vertex.buffer->info.stride <= state.limits.vertexBufferStride, "Vertex buffer stride exceeds vertexBufferStride limit");
gpu_bind_vertex_buffers(pass->stream, &draw->vertex.buffer->gpu, &vertexOffset, 1, 1);
pass->vertexBuffer = draw->vertex.buffer->gpu;
trackBuffer(pass, draw->vertex.buffer, GPU_PHASE_INPUT_VERTEX, GPU_CACHE_VERTEX);
}
if (!draw->index.buffer && draw->index.count > 0) {
@ -1974,6 +2080,7 @@ static void flushBuffers(Pass* pass, Draw* draw) {
gpu_index_type type = draw->index.buffer->info.stride == 4 ? GPU_INDEX_U32 : GPU_INDEX_U16;
gpu_bind_index_buffer(pass->stream, draw->index.buffer->gpu, 0, type);
pass->indexBuffer = draw->index.buffer->gpu;
trackBuffer(pass, draw->index.buffer, GPU_PHASE_INPUT_INDEX, GPU_CACHE_INDEX);
}
}
@ -1994,6 +2101,7 @@ static void lovrPassDraw(Pass* pass, Draw* draw) {
uint32_t id = pass->drawCount & 0xff;
if (draw->indirect) {
trackBuffer(pass, draw->indirect, GPU_PHASE_INDIRECT, GPU_CACHE_INDIRECT);
if (indexed) {
gpu_draw_indirect_indexed(pass->stream, draw->indirect->gpu, draw->offset, count);
} else {
@ -2037,20 +2145,12 @@ void lovrPassLine(Pass* pass, uint32_t count, float** points) {
}
}
static inline uint32_t packNormal(float nx, float ny, float nz) {
uint32_t n = 0;
n |= (uint32_t) (nx * 1023.f) << 20;
n |= (uint32_t) (ny * 1023.f) << 10;
n |= (uint32_t) (nz * 1023.f) << 0;
return n;
}
void lovrPassPlane(Pass* pass, float* transform, uint32_t hsegments, uint32_t vsegments) {
void lovrPassPlane(Pass* pass, float* transform, uint32_t cols, uint32_t rows) {
ShapeVertex* vertices;
uint16_t* indices;
uint32_t vertexCount = (hsegments + 1) * (vsegments + 1);
uint32_t indexCount = (hsegments * vsegments) * 6;
uint32_t vertexCount = (cols + 1) * (rows + 1);
uint32_t indexCount = (cols * rows) * 6;
lovrPassDraw(pass, &(Draw) {
.mode = GPU_DRAW_TRIANGLES,
@ -2062,24 +2162,24 @@ void lovrPassPlane(Pass* pass, float* transform, uint32_t hsegments, uint32_t vs
.index.count = indexCount
});
for (uint32_t y = 0, v = 0; y <= vsegments; y++) {
for (uint32_t x = 0; x <= hsegments; x++, v++) {
vertices[v].position.x = -.5f + (float) x / hsegments;
vertices[v].position.y = .5f - (float) y / vsegments;
vertices[v].position.z = 0.f;
vertices[v].normal = packNormal(0.f, 0.f, 1.f);
vertices[v].uv.u = (uint16_t) ((float) x / hsegments * 65535.f);
vertices[v].uv.v = (uint16_t) ((float) y / vsegments * 65535.f);
for (uint32_t y = 0; y <= rows; y++) {
float v = y * (1.f / rows);
for (uint32_t x = 0; x <= cols; x++) {
float u = x * (1.f / cols);
*vertices++ = (ShapeVertex) {
.position = { u - .5f, .5f - v, 0.f },
.normal = { 0.f, 0.f, 1.f },
.uv = { u, v }
};
}
}
uint16_t skip = hsegments + 1;
for (uint32_t y = 0; y < vsegments; y++) {
for (uint32_t x = 0; x < hsegments; x++) {
uint16_t a = ((y + 0) * skip) + x + 0;
uint16_t b = ((y + 0) * skip) + x + 1;
uint16_t c = ((y + 1) * skip) + x + 0;
uint16_t d = ((y + 1) * skip) + x + 1;
for (uint32_t y = 0; y < rows; y++) {
for (uint32_t x = 0; x < cols; x++) {
uint16_t a = (y * (cols + 1)) + x;
uint16_t b = a + 1;
uint16_t c = a + cols + 1;
uint16_t d = a + cols + 2;
uint16_t cell[] = { a, b, c, c, b, d };
memcpy(indices, cell, sizeof(cell));
indices += COUNTOF(cell);
@ -2087,6 +2187,89 @@ void lovrPassPlane(Pass* pass, float* transform, uint32_t hsegments, uint32_t vs
}
}
void lovrPassBox(Pass* pass, float* transform) {
ShapeVertex* vertices;
uint16_t* indices;
ShapeVertex vertexData[] = {
{ { -.5f, -.5f, -.5f }, { 0.f, 0.f, -1.f }, { 0.f, 0.f } }, // Front
{ { -.5f, .5f, -.5f }, { 0.f, 0.f, -1.f }, { 0.f, 1.f } },
{ { .5f, -.5f, -.5f }, { 0.f, 0.f, -1.f }, { 1.f, 0.f } },
{ { .5f, .5f, -.5f }, { 0.f, 0.f, -1.f }, { 1.f, 1.f } },
{ { .5f, .5f, -.5f }, { 1.f, 0.f, 0.f }, { 0.f, 1.f } }, // Right
{ { .5f, .5f, .5f }, { 1.f, 0.f, 0.f }, { 1.f, 1.f } },
{ { .5f, -.5f, -.5f }, { 1.f, 0.f, 0.f }, { 0.f, 0.f } },
{ { .5f, -.5f, .5f }, { 1.f, 0.f, 0.f }, { 1.f, 0.f } },
{ { .5f, -.5f, .5f }, { 0.f, 0.f, 1.f }, { 0.f, 0.f } }, // Back
{ { .5f, .5f, .5f }, { 0.f, 0.f, 1.f }, { 0.f, 1.f } },
{ { -.5f, -.5f, .5f }, { 0.f, 0.f, 1.f }, { 1.f, 0.f } },
{ { -.5f, .5f, .5f }, { 0.f, 0.f, 1.f }, { 1.f, 1.f } },
{ { -.5f, .5f, .5f }, { -1.f, 0.f, 0.f }, { 0.f, 1.f } }, // Left
{ { -.5f, .5f, -.5f }, { -1.f, 0.f, 0.f }, { 1.f, 1.f } },
{ { -.5f, -.5f, .5f }, { -1.f, 0.f, 0.f }, { 0.f, 0.f } },
{ { -.5f, -.5f, -.5f }, { -1.f, 0.f, 0.f }, { 1.f, 0.f } },
{ { -.5f, -.5f, -.5f }, { 0.f, -1.f, 0.f }, { 0.f, 0.f } }, // Bottom
{ { .5f, -.5f, -.5f }, { 0.f, -1.f, 0.f }, { 1.f, 0.f } },
{ { -.5f, -.5f, .5f }, { 0.f, -1.f, 0.f }, { 0.f, 1.f } },
{ { .5f, -.5f, .5f }, { 0.f, -1.f, 0.f }, { 1.f, 1.f } },
{ { -.5f, .5f, -.5f }, { 0.f, 1.f, 0.f }, { 0.f, 1.f } }, // Top
{ { -.5f, .5f, .5f }, { 0.f, 1.f, 0.f }, { 0.f, 0.f } },
{ { .5f, .5f, -.5f }, { 0.f, 1.f, 0.f }, { 1.f, 1.f } },
{ { .5f, .5f, .5f }, { 0.f, 1.f, 0.f }, { 1.f, 0.f } }
};
uint16_t indexData[] = {
0, 1, 2, 2, 1, 3,
4, 5, 6, 6, 5, 7,
8, 9, 10, 10, 9, 11,
12, 13, 14, 14, 13, 15,
16, 17, 18, 18, 17, 19,
20, 21, 22, 22, 21, 23
};
lovrPassDraw(pass, &(Draw) {
.mode = GPU_DRAW_TRIANGLES,
.transform = transform,
.vertex.format = VERTEX_SHAPE,
.vertex.pointer = (void**) &vertices,
.vertex.count = COUNTOF(vertexData),
.index.pointer = (void**) &indices,
.index.count = COUNTOF(indexData)
});
memcpy(vertices, vertexData, sizeof(vertexData));
memcpy(indices, indexData, sizeof(indexData));
}
void lovrPassCompute(Pass* pass, uint32_t x, uint32_t y, uint32_t z, Buffer* indirect, uint32_t offset) {
lovrCheck(pass->info.type == PASS_COMPUTE, "This function can only be called on a compute pass");
Shader* shader = pass->pipeline->shader;
lovrCheck(shader && shader->info.type == SHADER_COMPUTE, "Tried to run a compute shader, but no compute shader is bound");
lovrCheck(x <= state.limits.computeDispatchCount[0], "Compute %s count exceeds computeDispatchCount limit", "x");
lovrCheck(y <= state.limits.computeDispatchCount[1], "Compute %s count exceeds computeDispatchCount limit", "y");
lovrCheck(z <= state.limits.computeDispatchCount[2], "Compute %s count exceeds computeDispatchCount limit", "z");
gpu_pipeline* pipeline = state.pipelines.data[shader->computePipeline];
if (pass->pipeline->dirty) {
gpu_bind_pipeline(pass->stream, pipeline, true);
pass->pipeline->dirty = false;
}
flushConstants(pass, shader);
flushBindings(pass, shader);
if (indirect) {
lovrCheck(offset % 4 == 0, "Indirect compute offset must be a multiple of 4");
lovrCheck(offset <= indirect->size - 12, "Indirect compute offset overflows the Buffer");
trackBuffer(pass, indirect, GPU_PHASE_INDIRECT, GPU_CACHE_INDIRECT);
gpu_compute_indirect(pass->stream, indirect->gpu, offset);
} else {
gpu_compute(pass->stream, x, y, z);
}
}
void lovrPassClearBuffer(Pass* pass, Buffer* buffer, uint32_t offset, uint32_t extent) {
if (extent == 0) return;
if (extent == ~0u) extent = buffer->size - offset;
@ -2096,6 +2279,7 @@ void lovrPassClearBuffer(Pass* pass, Buffer* buffer, uint32_t offset, uint32_t e
lovrCheck(extent % 4 == 0, "Buffer clear extent must be a multiple of 4");
lovrCheck(offset + extent <= buffer->size, "Buffer clear range goes past the end of the Buffer");
gpu_clear_buffer(pass->stream, buffer->gpu, offset, extent);
trackBuffer(pass, buffer, GPU_PHASE_CLEAR, GPU_CACHE_TRANSFER_WRITE);
}
void lovrPassClearTexture(Pass* pass, Texture* texture, float value[4], uint32_t layer, uint32_t layerCount, uint32_t level, uint32_t levelCount) {
@ -2105,6 +2289,7 @@ void lovrPassClearTexture(Pass* pass, Texture* texture, float value[4], uint32_t
lovrCheck(texture->info.type == TEXTURE_3D || layer + layerCount <= texture->info.depth, "Texture clear range exceeds texture layer count");
lovrCheck(level + levelCount <= texture->info.mipmaps, "Texture clear range exceeds texture mipmap count");
gpu_clear_texture(pass->stream, texture->gpu, value, layer, layerCount, level, levelCount);
trackTexture(pass, texture, GPU_PHASE_CLEAR, GPU_CACHE_TRANSFER_WRITE);
}
void lovrPassCopyDataToBuffer(Pass* pass, void* data, Buffer* buffer, uint32_t offset, uint32_t extent) {
@ -2114,6 +2299,7 @@ void lovrPassCopyDataToBuffer(Pass* pass, void* data, Buffer* buffer, uint32_t o
gpu_buffer* scratchpad = tempAlloc(gpu_sizeof_buffer());
void* pointer = gpu_map(scratchpad, extent, 4, GPU_MAP_WRITE);
gpu_copy_buffers(pass->stream, scratchpad, buffer->gpu, 0, offset, extent);
trackBuffer(pass, buffer, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_WRITE);
memcpy(pointer, data, extent);
}
@ -2123,6 +2309,8 @@ void lovrPassCopyBufferToBuffer(Pass* pass, Buffer* src, Buffer* dst, uint32_t s
lovrCheck(srcOffset + extent <= src->size, "Buffer copy range goes past the end of the source Buffer");
lovrCheck(dstOffset + extent <= dst->size, "Buffer copy range goes past the end of the destination Buffer");
gpu_copy_buffers(pass->stream, src->gpu, dst->gpu, srcOffset, dstOffset, extent);
trackBuffer(pass, src, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_READ);
trackBuffer(pass, dst, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_WRITE);
}
void lovrPassCopyImageToTexture(Pass* pass, Image* image, Texture* texture, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t extent[4]) {
@ -2155,6 +2343,7 @@ void lovrPassCopyImageToTexture(Pass* pass, Image* image, Texture* texture, uint
}
}
gpu_copy_buffer_texture(pass->stream, buffer, texture->gpu, 0, dstOffset, extent);
trackTexture(pass, texture, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_WRITE);
}
void lovrPassCopyTextureToTexture(Pass* pass, Texture* src, Texture* dst, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t extent[3]) {
@ -2170,6 +2359,8 @@ void lovrPassCopyTextureToTexture(Pass* pass, Texture* src, Texture* dst, uint32
checkTextureBounds(&src->info, srcOffset, extent);
checkTextureBounds(&dst->info, dstOffset, extent);
gpu_copy_textures(pass->stream, src->gpu, dst->gpu, srcOffset, dstOffset, extent);
trackTexture(pass, src, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_READ);
trackTexture(pass, dst, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_WRITE);
}
void lovrPassBlit(Pass* pass, Texture* src, Texture* dst, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t srcExtent[3], uint32_t dstExtent[3], FilterMode filter) {
@ -2191,6 +2382,8 @@ void lovrPassBlit(Pass* pass, Texture* src, Texture* dst, uint32_t srcOffset[4],
checkTextureBounds(&src->info, srcOffset, srcExtent);
checkTextureBounds(&dst->info, dstOffset, dstExtent);
gpu_blit(pass->stream, src->gpu, dst->gpu, srcOffset, dstOffset, srcExtent, dstExtent, (gpu_filter) filter);
trackTexture(pass, src, GPU_PHASE_BLIT, GPU_CACHE_TRANSFER_READ);
trackTexture(pass, dst, GPU_PHASE_BLIT, GPU_CACHE_TRANSFER_WRITE);
}
void lovrPassMipmap(Pass* pass, Texture* texture, uint32_t base, uint32_t count) {
@ -2218,7 +2411,14 @@ void lovrPassMipmap(Pass* pass, Texture* texture, uint32_t base, uint32_t count)
volumetric ? MAX(texture->info.depth >> level, 1) : 1
};
gpu_blit(pass->stream, texture->gpu, texture->gpu, srcOffset, dstOffset, srcExtent, dstExtent, GPU_FILTER_LINEAR);
gpu_sync(pass->stream, &(gpu_barrier) {
.prev = GPU_PHASE_BLIT,
.next = GPU_PHASE_BLIT,
.flush = GPU_CACHE_TRANSFER_WRITE,
.invalidate = GPU_CACHE_TRANSFER_READ
}, 1);
}
trackTexture(pass, texture, GPU_PHASE_BLIT, GPU_CACHE_TRANSFER_READ | GPU_CACHE_TRANSFER_WRITE);
}
// Helpers
@ -2235,6 +2435,13 @@ static void* tempAlloc(size_t size) {
return state.allocator.memory + cursor;
}
static void* tempGrow(void* p, size_t size) {
if (size == 0) return NULL;
void* new = tempAlloc(size);
if (!p) return new;
return memcpy(new, p, size >> 1);
}
static void beginFrame(void) {
if (state.active) {
return;
@ -2246,6 +2453,7 @@ static void beginFrame(void) {
static gpu_stream* getTransfers(void) {
if (!state.transfers) {
state.syncTextureUpload = false;
state.transfers = lovrGraphicsGetPass(&(PassInfo) {
.type = PASS_TRANSFER,
.label = "Internal Transfers"
@ -2442,14 +2650,55 @@ static void checkTextureBounds(const TextureInfo* info, uint32_t offset[4], uint
lovrCheck(offset[3] < info->mipmaps, "Texture mipmap %d exceeds its mipmap count (%d)", offset[3] + 1, info->mipmaps);
}
uint32_t findShaderSlot(Shader* shader, const char* name, size_t length) {
uint32_t hash = (uint32_t) hash64(name, length);
for (uint32_t i = 0; i < shader->resourceCount; i++) {
if (shader->resources[i].hash == hash) {
return shader->resources[i].binding;
static ShaderResource* findShaderResource(Shader* shader, const char* name, size_t length, uint32_t slot) {
if (name) {
uint32_t hash = (uint32_t) hash64(name, length);
for (uint32_t i = 0; i < shader->resourceCount; i++) {
if (shader->resources[i].hash == hash) {
return &shader->resources[i];
}
}
lovrThrow("Shader has no variable named '%s'", name);
} else {
for (uint32_t i = 0; i < shader->resourceCount; i++) {
if (shader->resources[i].binding == slot) {
return &shader->resources[i];
}
}
lovrThrow("Shader has no variable in slot '%d'", slot);
}
lovrThrow("Shader has no variable named '%s'", name);
}
static void trackBuffer(Pass* pass, Buffer* buffer, gpu_phase phase, gpu_cache cache) {
if (lovrBufferIsTemporary(buffer)) {
return; // Scratch buffers are write-only from CPU and read-only from GPU, no sync needed
}
Access access = {
.buffer = buffer,
.sync = &buffer->sync,
.phase = phase,
.cache = cache
};
arr_push(&pass->access, access);
lovrRetain(buffer);
}
static void trackTexture(Pass* pass, Texture* texture, gpu_phase phase, gpu_cache cache) {
if (texture->info.usage == TEXTURE_SAMPLE) {
return; // If the texture is sample-only, no sync needed (initial upload is handled manually)
}
Access access = {
.texture = texture,
.sync = &texture->sync,
.phase = phase,
.cache = cache
};
arr_push(&pass->access, access);
lovrRetain(texture);
}
// Only an explicit set of SPIR-V capabilities are allowed

View File

@ -390,7 +390,9 @@ void lovrPassSendTexture(Pass* pass, const char* name, size_t length, uint32_t s
void lovrPassSendSampler(Pass* pass, const char* name, size_t length, uint32_t slot, Sampler* sampler);
void lovrPassPoints(Pass* pass, uint32_t count, float** vertices);
void lovrPassLine(Pass* pass, uint32_t count, float** vertices);
void lovrPassPlane(Pass* pass, float* transform, uint32_t hsegments, uint32_t vsegments);
void lovrPassPlane(Pass* pass, float* transform, uint32_t cols, uint32_t rows);
void lovrPassBox(Pass* pass, float* transform);
void lovrPassCompute(Pass* pass, uint32_t x, uint32_t y, uint32_t z, Buffer* indirect, uint32_t offset);
void lovrPassClearBuffer(Pass* pass, Buffer* buffer, uint32_t offset, uint32_t extent);
void lovrPassClearTexture(Pass* pass, Texture* texture, float value[4], uint32_t layer, uint32_t layerCount, uint32_t level, uint32_t levelCount);
void lovrPassCopyDataToBuffer(Pass* pass, void* data, Buffer* buffer, uint32_t offset, uint32_t size);