#include "graphics/graphics.h" #include "data/blob.h" #include "data/image.h" #include "data/modelData.h" #include "data/rasterizer.h" #include "event/event.h" #include "headset/headset.h" #include "math/math.h" #include "core/gpu.h" #include "core/maf.h" #include "core/spv.h" #include "core/os.h" #include "util.h" #include "monkey.h" #include "shaders.h" #include #include #include #include #include #include #ifdef LOVR_USE_GLSLANG #include "glslang_c_interface.h" #include "resource_limits_c.h" #endif #define MAX_PIPELINES 65536 #define MAX_TALLIES 256 #define TRANSFORM_STACK_SIZE 16 #define PIPELINE_STACK_SIZE 4 #define MAX_SHADER_RESOURCES 32 #define MAX_CUSTOM_ATTRIBUTES 10 #define LAYOUT_BUILTINS 0 #define LAYOUT_MATERIAL 1 #define LAYOUT_UNIFORMS 2 #define FLOAT_BITS(f) ((union { float f; uint32_t u; }) { f }).u typedef struct { void* next; void* pointer; gpu_buffer* handle; uint32_t tick; uint32_t size; uint32_t ref; } BufferBlock; typedef struct { BufferBlock* freelist; BufferBlock* current; uint32_t cursor; } BufferAllocator; typedef struct { BufferBlock* block; gpu_buffer* buffer; uint32_t offset; uint32_t extent; void* pointer; } BufferView; typedef struct { gpu_phase readPhase; gpu_phase writePhase; gpu_cache pendingReads; gpu_cache pendingWrite; uint32_t lastTransferRead; uint32_t lastTransferWrite; gpu_barrier* barrier; } Sync; struct Buffer { uint32_t ref; uint32_t base; Sync sync; gpu_buffer* gpu; BufferBlock* block; BufferInfo info; }; struct Texture { uint32_t ref; bool xrAcquired; Sync sync; gpu_texture* gpu; gpu_texture* renderView; gpu_texture* storageView; Material* material; Texture* root; uint32_t baseLayer; uint32_t baseLevel; TextureInfo info; }; struct Sampler { uint32_t ref; gpu_sampler* gpu; SamplerInfo info; }; enum { FLAG_VERTEX = (1 << 0), FLAG_FRAGMENT = (1 << 1), FLAG_COMPUTE = (1 << 2) }; typedef struct { uint32_t hash; uint32_t binding; gpu_slot_type type; gpu_phase phase; gpu_cache cache; uint32_t fieldCount; DataField* format; } ShaderResource; typedef struct { uint32_t location; uint32_t hash; } ShaderAttribute; struct Shader { uint32_t ref; Shader* parent; gpu_shader* gpu; gpu_pipeline* computePipeline; ShaderInfo info; size_t layout; uint32_t workgroupSize[3]; bool hasCustomAttributes; uint32_t attributeCount; uint32_t resourceCount; uint32_t bufferMask; uint32_t textureMask; uint32_t samplerMask; uint32_t storageMask; uint32_t uniformSize; uint32_t uniformCount; uint32_t stageMask; ShaderAttribute* attributes; ShaderResource* resources; DataField* uniforms; DataField* fields; uint32_t flagCount; uint32_t overrideCount; gpu_shader_flag* flags; uint32_t* flagLookup; char* names; }; struct Material { uint32_t ref; uint32_t next; uint32_t tick; uint16_t index; uint16_t block; gpu_bundle* bundle; MaterialInfo info; bool hasWritableTexture; }; typedef struct { uint32_t codepoint; float advance; uint16_t x, y; uint16_t uv[4]; float box[4]; } Glyph; struct Font { uint32_t ref; FontInfo info; Material* material; arr_t(Glyph) glyphs; map_t glyphLookup; map_t kerning; float pixelDensity; float lineSpacing; uint32_t padding; Texture* atlas; uint32_t atlasWidth; uint32_t atlasHeight; uint32_t rowHeight; uint32_t atlasX; uint32_t atlasY; }; struct Mesh { uint32_t ref; MeshStorage storage; Buffer* vertexBuffer; Buffer* indexBuffer; uint32_t indexCount; uint32_t dirtyVertices[2]; bool dirtyIndices; void* vertices; void* indices; float bounds[6]; bool hasBounds; DrawMode mode; uint32_t drawStart; uint32_t drawCount; uint32_t baseVertex; Material* material; }; typedef struct { float transform[12]; float color[4]; } DrawData; typedef enum { VERTEX_SHAPE, VERTEX_POINT, VERTEX_GLYPH, VERTEX_MODEL, VERTEX_EMPTY, VERTEX_FORMAT_COUNT } VertexFormat; typedef struct { uint64_t hash; DrawMode mode; DefaultShader shader; Material* material; float* transform; float* bounds; struct { Buffer* buffer; VertexFormat format; uint32_t count; void** pointer; } vertex; struct { Buffer* buffer; uint32_t count; void** pointer; } index; uint32_t start; uint32_t count; uint32_t instances; uint32_t baseVertex; } DrawInfo; typedef struct { float position[3]; float rotation[4]; float scale[3]; } NodeTransform; typedef struct { uint32_t index; uint32_t count; uint32_t vertexIndex; uint32_t vertexCount; } BlendGroup; struct Model { uint32_t ref; Model* parent; ModelInfo info; DrawInfo* draws; Buffer* rawVertexBuffer; Buffer* vertexBuffer; Buffer* indexBuffer; Buffer* blendBuffer; Buffer* skinBuffer; Mesh** meshes; Texture** textures; Material** materials; NodeTransform* localTransforms; float* globalTransforms; float* boundingBoxes; bool transformsDirty; bool blendShapesDirty; float* blendShapeWeights; BlendGroup* blendGroups; uint32_t blendGroupCount; uint32_t lastVertexAnimation; }; typedef enum { READBACK_BUFFER, READBACK_TEXTURE, READBACK_TIMESTAMP } ReadbackType; typedef struct { Pass* pass; double cpuTime; } TimingInfo; struct Readback { uint32_t ref; uint32_t tick; Readback* next; BufferView view; ReadbackType type; union { struct { Buffer* buffer; Blob* blob; }; struct { Texture* texture; Image* image; }; struct { TimingInfo* times; uint32_t count; }; }; }; typedef struct { float resolution[2]; float time; } Globals; typedef struct { float viewMatrix[16]; float projection[16]; float viewProjection[16]; float inverseProjection[16]; } Camera; typedef struct { struct { float x, y, z; } position; struct { float x, y, z; } normal; struct { float u, v; } uv; } ShapeVertex; typedef struct { struct { float x, y, z; } position; uint32_t normal; struct { float u, v; } uv; struct { uint8_t r, g, b, a; } color; uint32_t tangent; } ModelVertex; typedef struct { struct { float x, y, z; } position; struct { float x, y, z; } normal; struct { float x, y, z; } tangent; } BlendVertex; enum { SHAPE_PLANE, SHAPE_BOX, SHAPE_CIRCLE, SHAPE_SPHERE, SHAPE_CYLINDER, SHAPE_CONE, SHAPE_CAPSULE, SHAPE_TORUS, SHAPE_MONKEY }; enum { DIRTY_BINDINGS = (1 << 0), DIRTY_UNIFORMS = (1 << 1), DIRTY_CAMERA = (1 << 2), NEEDS_VIEW_CULL = (1 << 3) }; typedef struct { char* memory; size_t cursor; size_t length; size_t limit; } Allocator; typedef struct { uint64_t hash; uint32_t start; uint32_t baseVertex; uint32_t vertexBufferOffset; gpu_buffer* vertexBuffer; gpu_buffer* indexBuffer; } CachedShape; enum { ACCESS_COMPUTE, ACCESS_RENDER }; typedef struct { Sync* sync; void* object; gpu_phase phase; gpu_cache cache; } Access; typedef struct { void* prev; void* next; uint64_t count; uint64_t textureMask; uint64_t padding; Access list[41]; } AccessBlock; typedef struct { Texture* texture; LoadAction load; float clear[4]; } ColorAttachment; typedef struct { Texture* texture; TextureFormat format; LoadAction load; float clear; } DepthAttachment; typedef struct { ColorAttachment color[4]; DepthAttachment depth; uint32_t count; uint32_t width; uint32_t height; uint32_t views; uint32_t samples; bool resolve; } Canvas; typedef struct { bool dirty; bool viewCull; DrawMode mode; float color[4]; Buffer* lastVertexBuffer; VertexFormat lastVertexFormat; gpu_pipeline_info info; Material* material; Shader* shader; Font* font; } Pipeline; enum { COMPUTE_INDIRECT = (1 << 0), COMPUTE_BARRIER = (1 << 1) }; typedef struct { uint32_t flags; Shader* shader; gpu_bundle_info* bundleInfo; gpu_bundle* bundle; gpu_buffer* uniformBuffer; uint32_t uniformOffset; union { struct { uint32_t x; uint32_t y; uint32_t z; }; struct { gpu_buffer* buffer; uint32_t offset; } indirect; }; } Compute; enum { DRAW_INDIRECT = (1 << 0), DRAW_INDEX32 = (1 << 1), DRAW_HAS_BOUNDS = (1 << 2) }; typedef struct { uint16_t flags; uint16_t camera; uint32_t tally; Shader* shader; Material* material; gpu_pipeline_info* pipelineInfo; gpu_bundle_info* bundleInfo; gpu_pipeline* pipeline; gpu_bundle* bundle; gpu_buffer* vertexBuffer; gpu_buffer* indexBuffer; gpu_buffer* uniformBuffer; uint32_t vertexBufferOffset; uint32_t uniformOffset; union { struct { uint32_t start; uint32_t count; uint32_t instances; uint32_t baseVertex; }; struct { gpu_buffer* buffer; uint32_t offset; uint32_t count; uint32_t stride; // Deprecated } indirect; }; float transform[16]; float color[4]; float bounds[6]; } Draw; typedef struct { gpu_tally* gpu; Buffer* tempBuffer; bool active; uint32_t count; uint32_t bufferOffset; Buffer* buffer; } Tally; struct Pass { uint32_t ref; uint32_t flags; gpu_pass* gpu; Allocator allocator; BufferAllocator buffers; CachedShape geocache[16]; AccessBlock* access[2]; Tally tally; Canvas canvas; Camera* cameras; uint32_t cameraCount; float viewport[6]; uint32_t scissor[4]; Sampler* sampler; float* transform; Pipeline* pipeline; uint32_t transformIndex; uint32_t pipelineIndex; gpu_binding* bindings; void* uniforms; uint32_t computeCount; Compute* computes; uint32_t drawCount; uint32_t drawCapacity; Draw* draws; PassStats stats; }; typedef struct { Material* list; BufferView view; gpu_bundle_pool* bundlePool; gpu_bundle* bundles; uint32_t head; uint32_t tail; } MaterialBlock; typedef struct { void* next; gpu_bundle_pool* gpu; gpu_bundle* bundles; uint32_t cursor; uint32_t tick; } BundlePool; typedef struct { uint64_t hash; gpu_layout* gpu; BundlePool* head; BundlePool* tail; } Layout; typedef struct { gpu_texture* texture; uint32_t hash; uint32_t tick; } ScratchTexture; static struct { uint32_t ref; bool active; bool shouldPresent; bool timingEnabled; GraphicsConfig config; gpu_device_info device; gpu_features features; gpu_limits limits; gpu_stream* stream; gpu_barrier barrier; gpu_barrier transferBarrier; gpu_tally* timestamps; uint32_t timestampCount; uint32_t tick; float background[4]; TextureFormat depthFormat; Texture* window; Pass* windowPass; Font* defaultFont; Buffer* defaultBuffer; Texture* defaultTexture; Sampler* defaultSamplers[2]; Shader* defaultShaders[DEFAULT_SHADER_COUNT]; gpu_vertex_format vertexFormats[VERTEX_FORMAT_COUNT]; Readback* oldestReadback; Readback* newestReadback; Material* defaultMaterial; size_t materialBlock; arr_t(MaterialBlock) materialBlocks; BufferAllocator bufferAllocators[4]; arr_t(ScratchTexture) scratchTextures; map_t passLookup; map_t pipelineLookup; gpu_pipeline* pipelines; uint32_t pipelineCount; arr_t(Layout) layouts; Allocator allocator; } state; // Helpers static void* tempAlloc(Allocator* allocator, size_t size); static size_t tempPush(Allocator* allocator); static void tempPop(Allocator* allocator, size_t stack); static gpu_pipeline* getPipeline(uint32_t index); static BufferBlock* getBlock(gpu_buffer_type type, uint32_t size); static void freeBlock(BufferAllocator* allocator, BufferBlock* block); static BufferView allocateBuffer(BufferAllocator* allocator, gpu_buffer_type type, uint32_t size, size_t align); static BufferView getBuffer(gpu_buffer_type type, uint32_t size, size_t align); static int u64cmp(const void* a, const void* b); static uint32_t lcm(uint32_t a, uint32_t b); static void beginFrame(void); static void flushTransfers(void); static void processReadbacks(void); static gpu_pass* getPass(Canvas* canvas); static size_t getLayout(gpu_slot* slots, uint32_t count); static gpu_bundle* getBundle(size_t layout, gpu_binding* bindings, uint32_t count); static gpu_texture* getScratchTexture(gpu_stream* stream, Canvas* canvas, TextureFormat format, bool srgb); static bool isDepthFormat(TextureFormat format); static bool supportsSRGB(TextureFormat format); static uint32_t measureTexture(TextureFormat format, uint32_t w, uint32_t h, uint32_t d); static void checkTextureBounds(const TextureInfo* info, uint32_t offset[4], uint32_t extent[3]); static void mipmapTexture(gpu_stream* stream, Texture* texture, uint32_t base, uint32_t count); static ShaderResource* findShaderResource(Shader* shader, const char* name, size_t length); static Access* getNextAccess(Pass* pass, int type, bool texture); static void trackBuffer(Pass* pass, Buffer* buffer, gpu_phase phase, gpu_cache cache); static void trackTexture(Pass* pass, Texture* texture, gpu_phase phase, gpu_cache cache); static void trackMaterial(Pass* pass, Material* material); static bool syncResource(Access* access, gpu_barrier* barrier); static gpu_barrier syncTransfer(Sync* sync, gpu_phase phase, gpu_cache cache); static void updateModelTransforms(Model* model, uint32_t nodeIndex, float* parent); static void checkShaderFeatures(uint32_t* features, uint32_t count); static void onResize(uint32_t width, uint32_t height); static void onMessage(void* context, const char* message, bool severe); // Entry bool lovrGraphicsInit(GraphicsConfig* config) { if (atomic_fetch_add(&state.ref, 1)) return false; gpu_config gpu = { .debug = config->debug, .fnLog = onMessage, .fnAlloc = lovrMalloc, .fnFree = lovrFree, .engineName = "LOVR", .engineVersion = { LOVR_VERSION_MAJOR, LOVR_VERSION_MINOR, LOVR_VERSION_PATCH }, .device = &state.device, .features = &state.features, .limits = &state.limits, #ifdef LOVR_VK .vk.cacheData = config->cacheData, .vk.cacheSize = config->cacheSize, #endif #if defined(LOVR_VK) && !defined(LOVR_DISABLE_HEADSET) .vk.getPhysicalDevice = lovrHeadsetInterface ? lovrHeadsetInterface->getVulkanPhysicalDevice : NULL, .vk.createInstance = lovrHeadsetInterface ? lovrHeadsetInterface->createVulkanInstance : NULL, .vk.createDevice = lovrHeadsetInterface ? lovrHeadsetInterface->createVulkanDevice : NULL, #endif }; if (!gpu_init(&gpu)) { lovrThrow("Failed to initialize GPU"); } state.config = *config; state.timingEnabled = config->debug; // Temporary frame memory uses a large 1GB virtual memory allocation, committing pages as needed state.allocator.length = 1 << 14; state.allocator.limit = 1 << 30; state.allocator.memory = os_vm_init(state.allocator.limit); os_vm_commit(state.allocator.memory, state.allocator.length); state.pipelines = os_vm_init(MAX_PIPELINES * gpu_sizeof_pipeline()); lovrAssert(state.pipelines, "Out of memory"); map_init(&state.passLookup, 4); map_init(&state.pipelineLookup, 64); arr_init(&state.layouts, realloc); arr_init(&state.materialBlocks, realloc); arr_init(&state.scratchTextures, realloc); gpu_slot builtinSlots[] = { { 0, GPU_SLOT_UNIFORM_BUFFER, GPU_STAGE_GRAPHICS }, // Globals { 1, GPU_SLOT_UNIFORM_BUFFER_DYNAMIC, GPU_STAGE_GRAPHICS }, // Cameras { 2, GPU_SLOT_UNIFORM_BUFFER_DYNAMIC, GPU_STAGE_GRAPHICS }, // DrawData { 3, GPU_SLOT_SAMPLER, GPU_STAGE_GRAPHICS } // Sampler }; size_t builtinLayout = getLayout(builtinSlots, COUNTOF(builtinSlots)); if (builtinLayout != LAYOUT_BUILTINS) lovrUnreachable(); gpu_slot materialSlots[] = { { 0, GPU_SLOT_UNIFORM_BUFFER, GPU_STAGE_GRAPHICS }, // Data { 1, GPU_SLOT_SAMPLED_TEXTURE, GPU_STAGE_GRAPHICS }, // Color { 2, GPU_SLOT_SAMPLED_TEXTURE, GPU_STAGE_GRAPHICS }, // Glow { 3, GPU_SLOT_SAMPLED_TEXTURE, GPU_STAGE_GRAPHICS }, // Occlusion { 4, GPU_SLOT_SAMPLED_TEXTURE, GPU_STAGE_GRAPHICS }, // Metalness { 5, GPU_SLOT_SAMPLED_TEXTURE, GPU_STAGE_GRAPHICS }, // Roughness { 6, GPU_SLOT_SAMPLED_TEXTURE, GPU_STAGE_GRAPHICS }, // Clearcoat { 7, GPU_SLOT_SAMPLED_TEXTURE, GPU_STAGE_GRAPHICS } // Normal }; size_t materialLayout = getLayout(materialSlots, COUNTOF(materialSlots)); if (materialLayout != LAYOUT_MATERIAL) lovrUnreachable(); gpu_slot uniformSlots[] = { { 0, GPU_SLOT_UNIFORM_BUFFER_DYNAMIC, GPU_STAGE_GRAPHICS | GPU_STAGE_COMPUTE } }; size_t uniformLayout = getLayout(uniformSlots, COUNTOF(uniformSlots)); if (uniformLayout != LAYOUT_UNIFORMS) lovrUnreachable(); float data[] = { 0.f, 0.f, 0.f, 0.f, 1.f, 1.f, 1.f, 1.f }; state.defaultBuffer = lovrBufferCreate(&(BufferInfo) { .size = sizeof(data), .label = "Default Buffer" }, NULL); beginFrame(); BufferView view = getBuffer(GPU_BUFFER_UPLOAD, sizeof(data), 4); memcpy(view.pointer, data, sizeof(data)); gpu_copy_buffers(state.stream, view.buffer, state.defaultBuffer->gpu, view.offset, state.defaultBuffer->base, sizeof(data)); Image* image = lovrImageCreateRaw(4, 4, FORMAT_RGBA8, false); float white[4] = { 1.f, 1.f, 1.f, 1.f }; for (uint32_t y = 0; y < 4; y++) { for (uint32_t x = 0; x < 4; x++) { lovrImageSetPixel(image, x, y, white); } } state.defaultTexture = lovrTextureCreate(&(TextureInfo) { .type = TEXTURE_2D, .usage = TEXTURE_SAMPLE, .format = FORMAT_RGBA8, .width = 4, .height = 4, .layers = 1, .mipmaps = 1, .srgb = false, .imageCount = 1, .images = &image, .label = "Default Texture" }); lovrRelease(image, lovrImageDestroy); for (uint32_t i = 0; i < 2; i++) { state.defaultSamplers[i] = lovrSamplerCreate(&(SamplerInfo) { .min = i == 0 ? FILTER_NEAREST : FILTER_LINEAR, .mag = i == 0 ? FILTER_NEAREST : FILTER_LINEAR, .mip = i == 0 ? FILTER_NEAREST : FILTER_LINEAR, .wrap = { WRAP_REPEAT, WRAP_REPEAT, WRAP_REPEAT }, .range = { 0.f, -1.f } }); } state.vertexFormats[VERTEX_SHAPE] = (gpu_vertex_format) { .bufferCount = 2, .attributeCount = 5, .bufferStrides[0] = sizeof(ShapeVertex), .attributes[0] = { 0, 10, offsetof(ShapeVertex, position), GPU_TYPE_F32x3 }, .attributes[1] = { 0, 11, offsetof(ShapeVertex, normal), GPU_TYPE_F32x3 }, .attributes[2] = { 0, 12, offsetof(ShapeVertex, uv), GPU_TYPE_F32x2 }, .attributes[3] = { 1, 13, 16, GPU_TYPE_F32x4 }, .attributes[4] = { 1, 14, 0, GPU_TYPE_F32x4 } }; state.vertexFormats[VERTEX_POINT] = (gpu_vertex_format) { .bufferCount = 2, .attributeCount = 5, .bufferStrides[0] = 12, .attributes[0] = { 0, 10, 0, GPU_TYPE_F32x3 }, .attributes[1] = { 1, 11, 0, GPU_TYPE_F32x4 }, .attributes[2] = { 1, 12, 0, GPU_TYPE_F32x4 }, .attributes[3] = { 1, 13, 16, GPU_TYPE_F32x4 }, .attributes[4] = { 1, 14, 0, GPU_TYPE_F32x4 } }; state.vertexFormats[VERTEX_GLYPH] = (gpu_vertex_format) { .bufferCount = 2, .attributeCount = 5, .bufferStrides[0] = sizeof(GlyphVertex), .attributes[0] = { 0, 10, offsetof(GlyphVertex, position), GPU_TYPE_F32x2 }, .attributes[1] = { 1, 11, 0, GPU_TYPE_F32x4 }, .attributes[2] = { 0, 12, offsetof(GlyphVertex, uv), GPU_TYPE_UN16x2 }, .attributes[3] = { 0, 13, offsetof(GlyphVertex, color), GPU_TYPE_UN8x4 }, .attributes[4] = { 1, 14, 0, GPU_TYPE_F32x4 } }; state.vertexFormats[VERTEX_MODEL] = (gpu_vertex_format) { .bufferCount = 2, .attributeCount = 5, .bufferStrides[0] = sizeof(ModelVertex), .attributes[0] = { 0, 10, offsetof(ModelVertex, position), GPU_TYPE_F32x3 }, .attributes[1] = { 0, 11, offsetof(ModelVertex, normal), GPU_TYPE_SN10x3 }, .attributes[2] = { 0, 12, offsetof(ModelVertex, uv), GPU_TYPE_F32x2 }, .attributes[3] = { 0, 13, offsetof(ModelVertex, color), GPU_TYPE_UN8x4 }, .attributes[4] = { 0, 14, offsetof(ModelVertex, tangent), GPU_TYPE_SN10x3 } }; state.vertexFormats[VERTEX_EMPTY] = (gpu_vertex_format) { .bufferCount = 2, .attributeCount = 5, .attributes[0] = { 1, 10, 0, GPU_TYPE_F32x3 }, .attributes[1] = { 1, 11, 0, GPU_TYPE_F32x3 }, .attributes[2] = { 1, 12, 0, GPU_TYPE_F32x2 }, .attributes[3] = { 1, 13, 16, GPU_TYPE_F32x4 }, .attributes[4] = { 1, 14, 0, GPU_TYPE_F32x4 } }; state.defaultMaterial = lovrMaterialCreate(&(MaterialInfo) { .data.color = { 1.f, 1.f, 1.f, 1.f }, .data.uvScale = { 1.f, 1.f }, .data.metalness = 0.f, .data.roughness = 1.f, .data.normalScale = 1.f, .texture = state.defaultTexture }); float16Init(); #ifdef LOVR_USE_GLSLANG glslang_initialize_process(); #endif return true; } void lovrGraphicsDestroy(void) { if (atomic_fetch_sub(&state.ref, 1) != 1) return; #ifndef LOVR_DISABLE_HEADSET // If there's an active headset session it needs to be stopped so it can clean up its Pass and // swapchain textures before gpu_destroy is called. This is really hacky and should be solved // with module-level refcounting in the future. if (lovrHeadsetInterface && lovrHeadsetInterface->stop) { lovrHeadsetInterface->stop(); } #endif Readback* readback = state.oldestReadback; while (readback) { Readback* next = readback->next; lovrReadbackDestroy(readback); readback = next; } if (state.timestamps) gpu_tally_destroy(state.timestamps); lovrFree(state.timestamps); lovrRelease(state.window, lovrTextureDestroy); lovrRelease(state.windowPass, lovrPassDestroy); lovrRelease(state.defaultFont, lovrFontDestroy); lovrRelease(state.defaultBuffer, lovrBufferDestroy); lovrRelease(state.defaultTexture, lovrTextureDestroy); lovrRelease(state.defaultSamplers[0], lovrSamplerDestroy); lovrRelease(state.defaultSamplers[1], lovrSamplerDestroy); for (size_t i = 0; i < COUNTOF(state.defaultShaders); i++) { lovrRelease(state.defaultShaders[i], lovrShaderDestroy); } lovrRelease(state.defaultMaterial, lovrMaterialDestroy); for (size_t i = 0; i < state.materialBlocks.length; i++) { MaterialBlock* block = &state.materialBlocks.data[i]; BufferBlock* current = state.bufferAllocators[GPU_BUFFER_STATIC].current; if (block->view.block != current && atomic_fetch_sub(&block->view.block->ref, 1) == 1) { freeBlock(&state.bufferAllocators[GPU_BUFFER_STATIC], block->view.block); } gpu_bundle_pool_destroy(block->bundlePool); lovrFree(block->list); lovrFree(block->bundlePool); lovrFree(block->bundles); } arr_free(&state.materialBlocks); for (size_t i = 0; i < state.scratchTextures.length; i++) { gpu_texture_destroy(state.scratchTextures.data[i].texture); lovrFree(state.scratchTextures.data[i].texture); } arr_free(&state.scratchTextures); for (size_t i = 0; i < state.pipelineCount; i++) { gpu_pipeline_destroy(getPipeline(i)); } os_vm_free(state.pipelines, MAX_PIPELINES * gpu_sizeof_pipeline()); map_free(&state.pipelineLookup); for (size_t i = 0; i < state.passLookup.size; i++) { if (state.passLookup.values[i] != MAP_NIL) { gpu_pass* pass = (gpu_pass*) (uintptr_t) state.passLookup.values[i]; gpu_pass_destroy(pass); lovrFree(pass); } } map_free(&state.passLookup); for (size_t i = 0; i < COUNTOF(state.bufferAllocators); i++) { BufferBlock* block = state.bufferAllocators[i].freelist; while (block) { gpu_buffer_destroy(block->handle); BufferBlock* next = block->next; lovrFree(block); block = next; } BufferBlock* current = state.bufferAllocators[i].current; if (current) { gpu_buffer_destroy(current->handle); lovrFree(current); } } for (size_t i = 0; i < state.layouts.length; i++) { BundlePool* pool = state.layouts.data[i].head; while (pool) { BundlePool* next = pool->next; gpu_bundle_pool_destroy(pool->gpu); lovrFree(pool->gpu); lovrFree(pool->bundles); lovrFree(pool); pool = next; } gpu_layout_destroy(state.layouts.data[i].gpu); lovrFree(state.layouts.data[i].gpu); } arr_free(&state.layouts); gpu_destroy(); #ifdef LOVR_USE_GLSLANG glslang_finalize_process(); #endif os_vm_free(state.allocator.memory, state.allocator.limit); memset(&state, 0, sizeof(state)); } bool lovrGraphicsIsInitialized(void) { return state.ref; } void lovrGraphicsGetDevice(GraphicsDevice* device) { device->deviceId = state.device.deviceId; device->vendorId = state.device.vendorId; device->name = state.device.deviceName; device->renderer = state.device.renderer; device->subgroupSize = state.device.subgroupSize; device->discrete = state.device.discrete; } void lovrGraphicsGetFeatures(GraphicsFeatures* features) { features->textureBC = state.features.textureBC; features->textureASTC = state.features.textureASTC; features->wireframe = state.features.wireframe; features->depthClamp = state.features.depthClamp; features->depthResolve = state.features.depthResolve; features->indirectDrawFirstInstance = state.features.indirectDrawFirstInstance; features->float64 = state.features.float64; features->int64 = state.features.int64; features->int16 = state.features.int16; } void lovrGraphicsGetLimits(GraphicsLimits* limits) { limits->textureSize2D = state.limits.textureSize2D; limits->textureSize3D = state.limits.textureSize3D; limits->textureSizeCube = state.limits.textureSizeCube; limits->textureLayers = state.limits.textureLayers; limits->renderSize[0] = state.limits.renderSize[0]; limits->renderSize[1] = state.limits.renderSize[1]; limits->renderSize[2] = state.limits.renderSize[2]; limits->uniformBuffersPerStage = MIN(state.limits.uniformBuffersPerStage - 3, MAX_SHADER_RESOURCES); limits->storageBuffersPerStage = MIN(state.limits.storageBuffersPerStage, MAX_SHADER_RESOURCES); limits->sampledTexturesPerStage = MIN(state.limits.sampledTexturesPerStage - 7, MAX_SHADER_RESOURCES); limits->storageTexturesPerStage = MIN(state.limits.storageTexturesPerStage, MAX_SHADER_RESOURCES); limits->samplersPerStage = MIN(state.limits.samplersPerStage - 1, MAX_SHADER_RESOURCES); limits->resourcesPerShader = MAX_SHADER_RESOURCES; limits->uniformBufferRange = state.limits.uniformBufferRange; limits->storageBufferRange = state.limits.storageBufferRange; limits->uniformBufferAlign = state.limits.uniformBufferAlign; limits->storageBufferAlign = state.limits.storageBufferAlign; limits->vertexAttributes = 10; limits->vertexBufferStride = state.limits.vertexBufferStride; limits->vertexShaderOutputs = 10; limits->clipDistances = state.limits.clipDistances; limits->cullDistances = state.limits.cullDistances; limits->clipAndCullDistances = state.limits.clipAndCullDistances; memcpy(limits->workgroupCount, state.limits.workgroupCount, 3 * sizeof(uint32_t)); memcpy(limits->workgroupSize, state.limits.workgroupSize, 3 * sizeof(uint32_t)); limits->totalWorkgroupSize = state.limits.totalWorkgroupSize; limits->computeSharedMemory = state.limits.computeSharedMemory; limits->shaderConstantSize = state.limits.pushConstantSize; limits->indirectDrawCount = state.limits.indirectDrawCount; limits->instances = state.limits.instances; limits->anisotropy = state.limits.anisotropy; limits->pointSize = state.limits.pointSize; } uint32_t lovrGraphicsGetFormatSupport(uint32_t format, uint32_t features) { uint32_t support = 0; for (uint32_t i = 0; i < 2; i++) { uint8_t supports = state.features.formats[format][i]; if (features) { support |= (((~features & TEXTURE_FEATURE_SAMPLE) || (supports & GPU_FEATURE_SAMPLE)) && ((~features & TEXTURE_FEATURE_RENDER) || (supports & GPU_FEATURE_RENDER)) && ((~features & TEXTURE_FEATURE_STORAGE) || (supports & GPU_FEATURE_STORAGE)) && ((~features & TEXTURE_FEATURE_BLIT) || (supports & GPU_FEATURE_BLIT))) << i; } else { support |= !!supports << i; } } return support; } void lovrGraphicsGetShaderCache(void* data, size_t* size) { gpu_pipeline_get_cache(data, size); } void lovrGraphicsGetBackgroundColor(float background[4]) { background[0] = lovrMathLinearToGamma(state.background[0]); background[1] = lovrMathLinearToGamma(state.background[1]); background[2] = lovrMathLinearToGamma(state.background[2]); background[3] = state.background[3]; } void lovrGraphicsSetBackgroundColor(float background[4]) { state.background[0] = lovrMathGammaToLinear(background[0]); state.background[1] = lovrMathGammaToLinear(background[1]); state.background[2] = lovrMathGammaToLinear(background[2]); state.background[3] = background[3]; } bool lovrGraphicsIsTimingEnabled(void) { return state.timingEnabled; } void lovrGraphicsSetTimingEnabled(bool enable) { state.timingEnabled = enable; } static void recordComputePass(Pass* pass, gpu_stream* stream) { if (pass->computeCount == 0) { return; } gpu_pipeline* pipeline = NULL; gpu_bundle_info* bundleInfo = NULL; gpu_bundle* uniformBundle = NULL; gpu_buffer* uniformBuffer = NULL; uint32_t uniformOffset = 0; gpu_compute_begin(stream); for (uint32_t i = 0; i < pass->computeCount; i++) { Compute* compute = &pass->computes[i]; if (compute->shader->computePipeline != pipeline) { gpu_bind_pipeline(stream, compute->shader->computePipeline, GPU_PIPELINE_COMPUTE); pipeline = compute->shader->computePipeline; } if (compute->bundleInfo != bundleInfo) { bundleInfo = compute->bundleInfo; gpu_bundle* bundle = getBundle(compute->shader->layout, bundleInfo->bindings, bundleInfo->count); gpu_bind_bundles(stream, compute->shader->gpu, &bundle, 0, 1, NULL, 0); } if (compute->uniformBuffer != uniformBuffer || compute->uniformOffset != uniformOffset) { if (compute->uniformBuffer != uniformBuffer) { uniformBundle = getBundle(LAYOUT_UNIFORMS, &(gpu_binding) { .number = 0, .type = GPU_SLOT_UNIFORM_BUFFER_DYNAMIC, .buffer.object = compute->uniformBuffer, .buffer.extent = compute->shader->uniformSize }, 1); } gpu_bind_bundles(stream, compute->shader->gpu, &uniformBundle, 1, 1, &compute->uniformOffset, 1); uniformBuffer = compute->uniformBuffer; uniformOffset = compute->uniformOffset; } if (compute->flags & COMPUTE_INDIRECT) { gpu_compute_indirect(stream, compute->indirect.buffer, compute->indirect.offset); } else { gpu_compute(stream, compute->x, compute->y, compute->z); } if ((compute->flags & COMPUTE_BARRIER) && i < pass->computeCount - 1) { gpu_sync(stream, &(gpu_barrier) { .prev = GPU_PHASE_SHADER_COMPUTE, .next = GPU_PHASE_INDIRECT | GPU_PHASE_SHADER_COMPUTE, .flush = GPU_CACHE_STORAGE_WRITE, .clear = GPU_CACHE_INDIRECT | GPU_CACHE_UNIFORM | GPU_CACHE_TEXTURE | GPU_CACHE_STORAGE_READ }, 1); } } gpu_compute_end(stream); } static void recordRenderPass(Pass* pass, gpu_stream* stream) { Canvas* canvas = &pass->canvas; if (canvas->count == 0 && !canvas->depth.texture) { return; } // Canvas gpu_canvas target = { 0 }; Texture* texture = canvas->color[0].texture; for (uint32_t i = 0; i < canvas->count; i++, texture = canvas->color[i].texture) { target.color[i] = (gpu_color_attachment) { .texture = canvas->resolve ? getScratchTexture(stream, canvas, texture->info.format, texture->info.srgb) : texture->renderView, .resolve = canvas->resolve ? texture->renderView : NULL, .clear[0] = canvas->color[i].clear[0], .clear[1] = canvas->color[i].clear[1], .clear[2] = canvas->color[i].clear[2], .clear[3] = canvas->color[i].clear[3] }; } if ((texture = canvas->depth.texture) != NULL || canvas->depth.format) { target.depth = (gpu_depth_attachment) { .texture = canvas->resolve || !texture ? getScratchTexture(stream, canvas, canvas->depth.format, false) : texture->renderView, .resolve = canvas->resolve && texture ? texture->renderView : NULL, .clear = canvas->depth.clear }; } target.pass = pass->gpu; target.width = canvas->width; target.height = canvas->height; // Cameras Camera* camera = pass->cameras; for (uint32_t c = 0; c < pass->cameraCount; c++) { for (uint32_t v = 0; v < canvas->views; v++, camera++) { mat4_init(camera->viewProjection, camera->projection); mat4_init(camera->inverseProjection, camera->projection); mat4_mul(camera->viewProjection, camera->viewMatrix); mat4_invert(camera->inverseProjection); } } // Frustum Culling uint32_t activeDrawCount = 0; uint16_t* activeDraws = tempAlloc(&state.allocator, pass->drawCount * sizeof(uint16_t)); if (pass->flags & NEEDS_VIEW_CULL) { typedef struct { float planes[6][4]; } Frustum; Frustum* frusta = tempAlloc(&state.allocator, canvas->views * sizeof(Frustum)); uint32_t drawIndex = 0; for (uint32_t c = 0; c < pass->cameraCount; c++) { for (uint32_t v = 0; v < canvas->views; v++) { float* m = pass->cameras[c * canvas->views + v].viewProjection; memcpy(frusta[v].planes, (float[6][4]) { { (m[3] + m[0]), (m[7] + m[4]), (m[11] + m[8]), (m[15] + m[12]) }, // Left { (m[3] - m[0]), (m[7] - m[4]), (m[11] - m[8]), (m[15] - m[12]) }, // Right { (m[3] + m[1]), (m[7] + m[5]), (m[11] + m[9]), (m[15] + m[13]) }, // Bottom { (m[3] - m[1]), (m[7] - m[5]), (m[11] - m[9]), (m[15] - m[13]) }, // Top { m[2], m[6], m[10], m[14] }, // Near { (m[3] - m[2]), (m[7] - m[6]), (m[11] - m[10]), (m[15] - m[14]) } // Far }, sizeof(Frustum)); } while (drawIndex < pass->drawCount) { Draw* draw = &pass->draws[drawIndex]; if (draw->camera != c) { break; } if (~draw->flags & DRAW_HAS_BOUNDS) { activeDraws[activeDrawCount++] = drawIndex++; continue; } float* center = draw->bounds + 0; float* extent = draw->bounds + 3; float corners[8][3] = { { center[0] - extent[0], center[1] - extent[1], center[2] - extent[2] }, { center[0] - extent[0], center[1] - extent[1], center[2] + extent[2] }, { center[0] - extent[0], center[1] + extent[1], center[2] - extent[2] }, { center[0] - extent[0], center[1] + extent[1], center[2] + extent[2] }, { center[0] + extent[0], center[1] - extent[1], center[2] - extent[2] }, { center[0] + extent[0], center[1] - extent[1], center[2] + extent[2] }, { center[0] + extent[0], center[1] + extent[1], center[2] - extent[2] }, { center[0] + extent[0], center[1] + extent[1], center[2] + extent[2] } }; for (uint32_t i = 0; i < COUNTOF(corners); i++) { mat4_mulPoint(draw->transform, corners[i]); } uint32_t visible = canvas->views; for (uint32_t v = 0; v < canvas->views; v++) { for (uint32_t p = 0; p < 6; p++) { bool inside = false; for (uint32_t c = 0; c < COUNTOF(corners); c++) { if (vec3_dot(corners[c], frusta[v].planes[p]) + frusta[v].planes[p][3] > 0.f) { inside = true; break; } } if (!inside) { visible--; break; } } } if (visible) { activeDraws[activeDrawCount++] = drawIndex; } drawIndex++; } } } else { for (uint32_t i = 0; i < pass->drawCount; i++) { activeDraws[activeDrawCount++] = i; } } pass->stats.drawsCulled = pass->drawCount - activeDrawCount; if (activeDrawCount == 0) { gpu_render_begin(stream, &target); gpu_render_end(stream, &target); return; } // Builtins gpu_binding builtins[] = { { 0, GPU_SLOT_UNIFORM_BUFFER, .buffer = { 0 } }, { 1, GPU_SLOT_UNIFORM_BUFFER_DYNAMIC, .buffer = { 0 } }, { 2, GPU_SLOT_UNIFORM_BUFFER_DYNAMIC, .buffer = { 0 } }, { 3, GPU_SLOT_SAMPLER, .sampler = pass->sampler ? pass->sampler->gpu : state.defaultSamplers[FILTER_LINEAR]->gpu } }; BufferView view; size_t align = state.limits.uniformBufferAlign; // Globals view = getBuffer(GPU_BUFFER_STREAM, sizeof(Globals), align); builtins[0].buffer = (gpu_buffer_binding) { view.buffer, view.offset, view.extent }; Globals* global = view.pointer; global->resolution[0] = canvas->width; global->resolution[1] = canvas->height; global->time = lovrHeadsetInterface ? lovrHeadsetInterface->getDisplayTime() : os_get_time(); // Cameras view = getBuffer(GPU_BUFFER_STREAM, pass->cameraCount * canvas->views * sizeof(Camera), align); builtins[1].buffer = (gpu_buffer_binding) { view.buffer, view.offset, view.extent }; memcpy(view.pointer, pass->cameras, pass->cameraCount * canvas->views * sizeof(Camera)); // DrawData uint32_t alignedDrawCount = activeDrawCount <= 256 ? activeDrawCount : ALIGN(activeDrawCount, 256); view = getBuffer(GPU_BUFFER_STREAM, alignedDrawCount * sizeof(DrawData), align); builtins[2].buffer = (gpu_buffer_binding) { view.buffer, view.offset, MIN(activeDrawCount, 256) * sizeof(DrawData) }; DrawData* data = view.pointer; for (uint32_t i = 0; i < activeDrawCount; i++, data++) { Draw* draw = &pass->draws[activeDraws[i]]; // transform is provided as 4x3 row-major matrix for packing reasons, need to transpose data->transform[0] = draw->transform[0]; data->transform[1] = draw->transform[4]; data->transform[2] = draw->transform[8]; data->transform[3] = draw->transform[12]; data->transform[4] = draw->transform[1]; data->transform[5] = draw->transform[5]; data->transform[6] = draw->transform[9]; data->transform[7] = draw->transform[13]; data->transform[8] = draw->transform[2]; data->transform[9] = draw->transform[6]; data->transform[10] = draw->transform[10]; data->transform[11] = draw->transform[14]; data->color[0] = draw->color[0]; data->color[1] = draw->color[1]; data->color[2] = draw->color[2]; data->color[3] = draw->color[3]; } gpu_bundle* builtinBundle = getBundle(LAYOUT_BUILTINS, builtins, COUNTOF(builtins)); // Pipelines if (!pass->draws[pass->drawCount - 1].pipeline) { uint32_t first = 0; while (pass->draws[first].pipeline) { first++; // TODO could binary search or cache } for (uint32_t i = first; i < pass->drawCount; i++) { Draw* prev = &pass->draws[i - 1]; Draw* draw = &pass->draws[i]; if (i > 0 && draw->pipelineInfo == prev->pipelineInfo) { draw->pipeline = prev->pipeline; continue; } uint64_t hash = hash64(draw->pipelineInfo, sizeof(gpu_pipeline_info)); uint64_t index = map_get(&state.pipelineLookup, hash); if (index == MAP_NIL) { lovrAssert(state.pipelineCount < MAX_PIPELINES, "Too many pipelines!"); index = state.pipelineCount++; os_vm_commit(state.pipelines, state.pipelineCount * gpu_sizeof_pipeline()); gpu_pipeline_init_graphics(getPipeline(index), draw->pipelineInfo); map_set(&state.pipelineLookup, hash, index); } draw->pipeline = getPipeline(index); } } // Bundles Draw* prev = NULL; for (uint32_t i = 0; i < activeDrawCount; i++) { Draw* draw = &pass->draws[activeDraws[i]]; if (i > 0 && draw->bundleInfo == prev->bundleInfo) { draw->bundle = prev->bundle; continue; } if (draw->bundleInfo) { draw->bundle = getBundle(draw->shader->layout, draw->bundleInfo->bindings, draw->bundleInfo->count); } else { draw->bundle = NULL; } prev = draw; } // Tally if (pass->tally.active) { lovrPassFinishTally(pass); } if (pass->tally.buffer && pass->tally.count > 0) { if (!pass->tally.gpu) { pass->tally.gpu = lovrMalloc(gpu_sizeof_tally()); gpu_tally_init(pass->tally.gpu, &(gpu_tally_info) { .type = GPU_TALLY_PIXEL, .count = MAX_TALLIES * state.limits.renderSize[2] }); BufferInfo info = { .size = MAX_TALLIES * state.limits.renderSize[2] * sizeof(uint32_t) }; pass->tally.tempBuffer = lovrBufferCreate(&info, NULL); } gpu_clear_tally(stream, pass->tally.gpu, 0, pass->tally.count * canvas->views); } // Do the thing! gpu_render_begin(stream, &target); float defaultViewport[6] = { 0.f, 0.f, (float) canvas->width, (float) canvas->height, 0.f, 1.f }; uint32_t defaultScissor[4] = { 0, 0, canvas->width, canvas->height }; float* viewport = pass->viewport[2] == 0.f && pass->viewport[3] == 0.f ? defaultViewport : pass->viewport; uint32_t* scissor = pass->scissor[2] == 0 && pass->scissor[3] == 0 ? defaultScissor : pass->scissor; gpu_set_viewport(stream, viewport, viewport + 4); gpu_set_scissor(stream, scissor); uint16_t cameraIndex = 0xffff; uint32_t tally = ~0u; gpu_pipeline* pipeline = NULL; gpu_bundle* bundle = NULL; Material* material = NULL; gpu_buffer* vertexBuffer = NULL; uint32_t vertexBufferOffset = 0; gpu_buffer* indexBuffer = NULL; gpu_buffer* uniformBuffer = NULL; uint32_t uniformOffset = 0; gpu_bundle* uniformBundle = NULL; gpu_bind_vertex_buffers(stream, &state.defaultBuffer->gpu, &state.defaultBuffer->base, 1, 1); for (uint32_t i = 0; i < activeDrawCount; i++) { Draw* draw = &pass->draws[activeDraws[i]]; if (pass->tally.buffer && draw->tally != tally) { if (tally != ~0u) gpu_tally_finish(stream, pass->tally.gpu, tally * canvas->views); if (draw->tally != ~0u) gpu_tally_begin(stream, pass->tally.gpu, draw->tally * canvas->views); tally = draw->tally; } if (draw->pipeline != pipeline) { gpu_bind_pipeline(stream, draw->pipeline, GPU_PIPELINE_GRAPHICS); pipeline = draw->pipeline; } if ((i & 0xff) == 0 || draw->camera != cameraIndex) { uint32_t dynamicOffsets[] = { draw->camera * canvas->views * sizeof(Camera), (i >> 8) * 256 * sizeof(DrawData) }; gpu_bind_bundles(stream, draw->shader->gpu, &builtinBundle, 0, 1, dynamicOffsets, COUNTOF(dynamicOffsets)); cameraIndex = draw->camera; } if (draw->material != material) { gpu_bind_bundles(stream, draw->shader->gpu, &draw->material->bundle, 1, 1, NULL, 0); material = draw->material; } if (draw->bundle && (draw->bundle != bundle)) { gpu_bind_bundles(stream, draw->shader->gpu, &draw->bundle, 2, 1, NULL, 0); bundle = draw->bundle; } if (draw->uniformBuffer != uniformBuffer || draw->uniformOffset != uniformOffset) { if (draw->uniformBuffer != uniformBuffer) { uniformBundle = getBundle(LAYOUT_UNIFORMS, &(gpu_binding) { .number = 0, .type = GPU_SLOT_UNIFORM_BUFFER_DYNAMIC, .buffer.object = draw->uniformBuffer, .buffer.extent = draw->shader->uniformSize }, 1); } gpu_bind_bundles(stream, draw->shader->gpu, &uniformBundle, 3, 1, &draw->uniformOffset, 1); uniformBuffer = draw->uniformBuffer; uniformOffset = draw->uniformOffset; } if (draw->vertexBuffer && (draw->vertexBuffer != vertexBuffer || draw->vertexBufferOffset != vertexBufferOffset)) { gpu_bind_vertex_buffers(stream, &draw->vertexBuffer, &draw->vertexBufferOffset, 0, 1); vertexBuffer = draw->vertexBuffer; vertexBufferOffset = draw->vertexBufferOffset; } if (draw->indexBuffer && draw->indexBuffer != indexBuffer) { gpu_index_type indexType = (draw->flags & DRAW_INDEX32) ? GPU_INDEX_U32 : GPU_INDEX_U16; gpu_bind_index_buffer(stream, draw->indexBuffer, 0, indexType); indexBuffer = draw->indexBuffer; } uint32_t DrawID = i & 0xff; gpu_push_constants(stream, draw->shader->gpu, &DrawID, sizeof(DrawID)); if (draw->flags & DRAW_INDIRECT) { if (draw->indexBuffer) { gpu_draw_indirect_indexed(stream, draw->indirect.buffer, draw->indirect.offset, draw->indirect.count, draw->indirect.stride); } else { gpu_draw_indirect(stream, draw->indirect.buffer, draw->indirect.offset, draw->indirect.count, draw->indirect.stride); } } else { if (draw->indexBuffer) { gpu_draw_indexed(stream, draw->count, draw->instances, draw->start, draw->baseVertex, 0); } else { gpu_draw(stream, draw->count, draw->instances, draw->start, 0); } } } if (tally != ~0u) { gpu_tally_finish(stream, pass->tally.gpu, tally * canvas->views); } gpu_render_end(stream, &target); // Automipmap bool synchronized = false; for (uint32_t t = 0; t < canvas->count; t++) { if (canvas->color[t].texture->info.mipmaps > 1) { if (!synchronized) { synchronized = true; gpu_sync(stream, &(gpu_barrier) { .prev = GPU_PHASE_COLOR, .next = GPU_PHASE_BLIT, .flush = GPU_CACHE_COLOR_WRITE, .clear = GPU_CACHE_TRANSFER_READ }, 1); } mipmapTexture(stream, canvas->color[t].texture, 0, ~0u); } } texture = canvas->depth.texture; if (canvas->depth.texture && canvas->depth.texture->info.mipmaps > 1) { gpu_sync(stream, &(gpu_barrier) { .prev = GPU_PHASE_DEPTH_EARLY | GPU_PHASE_DEPTH_LATE, .next = GPU_PHASE_BLIT, .flush = GPU_CACHE_DEPTH_WRITE, .clear = GPU_CACHE_TRANSFER_READ }, 1); mipmapTexture(stream, canvas->depth.texture, 0, ~0u); } // Tally copy if (pass->tally.buffer && pass->tally.count > 0) { Tally* tally = &pass->tally; uint32_t count = MIN(tally->count, (tally->buffer->info.size - tally->bufferOffset) / 4); Buffer* tempBuffer = pass->tally.tempBuffer; gpu_copy_tally_buffer(stream, tally->gpu, tempBuffer->gpu, 0, tempBuffer->base, count * canvas->views); gpu_barrier barrier = { .prev = GPU_PHASE_COPY, .next = GPU_PHASE_SHADER_COMPUTE, .flush = GPU_CACHE_TRANSFER_WRITE, .clear = GPU_CACHE_STORAGE_READ }; Access access = { .sync = &tally->buffer->sync, .object = tally->buffer, .phase = GPU_PHASE_SHADER_COMPUTE, .cache = GPU_CACHE_STORAGE_WRITE }; syncResource(&access, &barrier); gpu_sync(stream, &barrier, 1); gpu_binding bindings[] = { { 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { tempBuffer->gpu, tempBuffer->base, count * canvas->views * sizeof(uint32_t) } }, { 1, GPU_SLOT_STORAGE_BUFFER, .buffer = { tally->buffer->gpu, tally->buffer->base + tally->bufferOffset, count * sizeof(uint32_t) } } }; Shader* shader = lovrGraphicsGetDefaultShader(SHADER_TALLY_MERGE); gpu_bundle* bundle = getBundle(shader->layout, bindings, COUNTOF(bindings)); uint32_t constants[2] = { count, canvas->views }; gpu_compute_begin(stream); gpu_bind_pipeline(stream, shader->computePipeline, GPU_PIPELINE_COMPUTE); gpu_bind_bundles(stream, shader->gpu, &bundle, 0, 1, NULL, 0); gpu_push_constants(stream, shader->gpu, constants, sizeof(constants)); gpu_compute(stream, (count + 31) / 32, 1, 1); gpu_compute_end(stream); } } static Readback* lovrReadbackCreateTimestamp(TimingInfo* passes, uint32_t count, BufferView view); void lovrGraphicsSubmit(Pass** passes, uint32_t count) { beginFrame(); bool xrCanvas = false; uint32_t streamCount = 0; uint32_t maxStreams = count + 3; gpu_stream** streams = tempAlloc(&state.allocator, maxStreams * sizeof(gpu_stream*)); gpu_barrier* computeBarriers = tempAlloc(&state.allocator, count * sizeof(gpu_barrier)); gpu_barrier* renderBarriers = tempAlloc(&state.allocator, count * sizeof(gpu_barrier)); if (count > 0) { memset(computeBarriers, 0, count * sizeof(gpu_barrier)); memset(renderBarriers, 0, count * sizeof(gpu_barrier)); } if (state.transferBarrier.prev != 0 && state.transferBarrier.next != 0) { gpu_stream* stream = streams[streamCount++] = gpu_stream_begin(NULL); gpu_sync(stream, &state.transferBarrier, 1); gpu_stream_end(stream); } streams[streamCount++] = state.stream; // Synchronization for (uint32_t i = 0; i < count; i++) { Pass* pass = passes[i]; Canvas* canvas = &pass->canvas; state.shouldPresent |= pass == state.windowPass; // Compute for (AccessBlock* block = pass->access[ACCESS_COMPUTE]; block != NULL; block = block->next) { for (uint64_t j = 0; j < block->count; j++) { Access* access = &block->list[j]; if (access->sync->barrier != &computeBarriers[i] && syncResource(access, access->sync->barrier)) { access->sync->barrier = &computeBarriers[i]; } } } // Color attachments for (uint32_t t = 0; t < canvas->count; t++) { if (canvas->color[t].texture == state.window) continue; Texture* texture = canvas->color[t].texture; Access access = { .sync = &texture->root->sync, .object = texture, .phase = GPU_PHASE_COLOR, .cache = GPU_CACHE_COLOR_WRITE | ((!canvas->resolve && canvas->color[t].load == LOAD_KEEP) ? GPU_CACHE_COLOR_READ : 0) }; syncResource(&access, access.sync->barrier); access.sync->barrier = &renderBarriers[i]; if (texture->info.mipmaps > 1) { access.sync->writePhase = GPU_PHASE_BLIT; access.sync->pendingWrite = GPU_CACHE_TRANSFER_WRITE; } if (texture->info.xr && !texture->xrAcquired) { gpu_xr_acquire(state.stream, texture->gpu); texture->xrAcquired = true; xrCanvas = true; } } // Depth attachment if (canvas->depth.texture) { Texture* texture = canvas->depth.texture; Access access = { .sync = &texture->root->sync, .object = texture }; if (canvas->resolve) { access.phase = GPU_PHASE_COLOR; // Depth resolve operations act like color resolves w.r.t. sync access.cache = GPU_CACHE_COLOR_WRITE; } else { access.phase = canvas->depth.load == LOAD_KEEP ? GPU_PHASE_DEPTH_EARLY : GPU_PHASE_DEPTH_LATE; access.cache = GPU_CACHE_DEPTH_WRITE | (canvas->depth.load == LOAD_KEEP ? GPU_CACHE_DEPTH_READ : 0); } syncResource(&access, access.sync->barrier); access.sync->barrier = &renderBarriers[i]; if (texture->info.mipmaps > 1) { access.sync->writePhase = GPU_PHASE_BLIT; access.sync->pendingWrite = GPU_CACHE_TRANSFER_WRITE; } if (texture->info.xr && !texture->xrAcquired) { gpu_xr_acquire(state.stream, texture->gpu); texture->xrAcquired = true; xrCanvas = true; } } // Render resources (all read-only) for (AccessBlock* block = pass->access[ACCESS_RENDER]; block != NULL; block = block->next) { for (uint64_t j = 0; j < block->count; j++) { syncResource(&block->list[j], block->list[j].sync->barrier); } } } TimingInfo* times = NULL; if (state.timingEnabled && count > 0) { times = lovrMalloc(count * sizeof(TimingInfo)); for (uint32_t i = 0; i < count; i++) { times[i].pass = passes[i]; lovrRetain(passes[i]); } uint32_t timestampCount = 2 * count; if (timestampCount > state.timestampCount) { if (state.timestamps) { gpu_tally_destroy(state.timestamps); } else { state.timestamps = lovrMalloc(gpu_sizeof_tally()); } gpu_tally_info info = { .type = GPU_TALLY_TIME, .count = timestampCount }; gpu_tally_init(state.timestamps, &info); state.timestampCount = timestampCount; } gpu_clear_tally(state.stream, state.timestamps, 0, timestampCount); } gpu_sync(state.stream, &state.barrier, 1); gpu_stream_end(state.stream); for (uint32_t i = 0; i < count; i++) { gpu_stream* stream = streams[streamCount++] = gpu_stream_begin(NULL); if (state.timingEnabled) { times[i].cpuTime = os_get_time(); gpu_tally_mark(stream, state.timestamps, 2 * i + 0); } recordComputePass(passes[i], stream); gpu_sync(stream, &computeBarriers[i], 1); recordRenderPass(passes[i], stream); gpu_sync(stream, &renderBarriers[i], 1); if (state.timingEnabled) { times[i].cpuTime = os_get_time() - times[i].cpuTime; gpu_tally_mark(stream, state.timestamps, 2 * i + 1); } gpu_stream_end(stream); } if (xrCanvas || (state.timingEnabled && count > 0)) { gpu_stream* stream = streams[streamCount++] = gpu_stream_begin(NULL); // Timestamp Readback if (state.timingEnabled) { BufferView view = getBuffer(GPU_BUFFER_DOWNLOAD, 2 * count * sizeof(uint32_t), 4); gpu_copy_tally_buffer(stream, state.timestamps, view.buffer, 0, view.offset, 2 * count); Readback* readback = lovrReadbackCreateTimestamp(times, count, view); lovrRelease(readback, lovrReadbackDestroy); // It gets freed when it completes } // OpenXR Swapchain Layout Transitions for (uint32_t i = 0; i < count; i++) { Canvas* canvas = &passes[i]->canvas; for (uint32_t t = 0; t < canvas->count; t++) { Texture* texture = canvas->color[t].texture; if (texture->info.xr && texture->xrAcquired) { gpu_xr_release(stream, texture->gpu); texture->xrAcquired = false; } } if (canvas->depth.texture) { Texture* texture = canvas->depth.texture; if (texture->info.xr && texture->xrAcquired) { gpu_xr_release(stream, texture->gpu); texture->xrAcquired = false; } } } gpu_stream_end(stream); } // Cleanup for (uint32_t i = 0; i < count; i++) { Canvas* canvas = &passes[i]->canvas; // Reset barriers back to the default for (uint32_t t = 0; t < canvas->count; t++) { canvas->color[t].texture->sync.barrier = &state.barrier; } if (canvas->depth.texture) { canvas->depth.texture->sync.barrier = &state.barrier; } for (uint32_t j = 0; j < COUNTOF(passes[i]->access); j++) { for (AccessBlock* block = passes[i]->access[j]; block != NULL; block = block->next) { for (uint32_t k = 0; k < block->count; k++) { block->list[k].sync->barrier = &state.barrier; } } } // Mark the tick for any buffers that filled up, so we know when to recycle them for (BufferBlock* block = passes[i]->buffers.freelist; block; block = block->next) { block->tick = state.tick; } } gpu_submit(streams, streamCount); state.active = false; state.stream = NULL; } void lovrGraphicsPresent(void) { if (state.shouldPresent) { state.window->gpu = NULL; state.window->renderView = NULL; state.shouldPresent = false; gpu_surface_present(); } } void lovrGraphicsWait(void) { if (state.active) { lovrGraphicsSubmit(NULL, 0); } gpu_wait_idle(); processReadbacks(); } // Buffer uint32_t lovrGraphicsAlignFields(DataField* parent, DataLayout layout) { static const struct { uint32_t size, scalarAlign, baseAlign; } table[] = { [TYPE_I8x4] = { 4, 1, 4 }, [TYPE_U8x4] = { 4, 1, 4 }, [TYPE_SN8x4] = { 4, 1, 4 }, [TYPE_UN8x4] = { 4, 1, 4 }, [TYPE_SN10x3] = { 4, 4, 4 }, [TYPE_UN10x3] = { 4, 4, 4 }, [TYPE_I16] = { 2, 2, 2 }, [TYPE_I16x2] = { 4, 2, 4 }, [TYPE_I16x4] = { 8, 2, 8 }, [TYPE_U16] = { 2, 2, 2 }, [TYPE_U16x2] = { 4, 2, 4 }, [TYPE_U16x4] = { 8, 2, 8 }, [TYPE_SN16x2] = { 4, 2, 4 }, [TYPE_SN16x4] = { 8, 2, 8 }, [TYPE_UN16x2] = { 4, 2, 4 }, [TYPE_UN16x4] = { 8, 2, 8 }, [TYPE_I32] = { 4, 4, 4 }, [TYPE_I32x2] = { 8, 4, 8 }, [TYPE_I32x3] = { 12, 4, 16 }, [TYPE_I32x4] = { 16, 4, 16 }, [TYPE_U32] = { 4, 4, 4 }, [TYPE_U32x2] = { 8, 4, 8 }, [TYPE_U32x3] = { 12, 4, 16 }, [TYPE_U32x4] = { 16, 4, 16 }, [TYPE_F16x2] = { 4, 2, 4 }, [TYPE_F16x4] = { 8, 2, 8 }, [TYPE_F32] = { 4, 4, 4 }, [TYPE_F32x2] = { 8, 4, 8 }, [TYPE_F32x3] = { 12, 4, 16 }, [TYPE_F32x4] = { 16, 4, 16 }, [TYPE_MAT2] = { 16, 4, 8 }, [TYPE_MAT3] = { 48, 4, 16 }, [TYPE_MAT4] = { 64, 4, 16 }, [TYPE_INDEX16] = { 2, 2, 2 }, [TYPE_INDEX32] = { 4, 4, 4 } }; uint32_t cursor = 0; uint32_t extent = 0; uint32_t align = 1; for (uint32_t i = 0; i < parent->fieldCount; i++) { DataField* field = &parent->fields[i]; uint32_t length = MAX(field->length, 1); uint32_t subalign; if (field->fieldCount > 0) { subalign = lovrGraphicsAlignFields(field, layout); } else { subalign = layout == LAYOUT_PACKED ? table[field->type].scalarAlign : table[field->type].baseAlign; if (field->length > 0) { subalign = layout == LAYOUT_STD140 ? MAX(subalign, 16) : subalign; field->stride = MAX(subalign, table[field->type].size); } else { field->stride = table[field->type].size; } } if (field->offset == 0) { field->offset = ALIGN(cursor, subalign); cursor = field->offset + length * field->stride; } align = MAX(align, subalign); extent = MAX(extent, field->offset + length * field->stride); } if (layout == LAYOUT_STD140) align = MAX(align, 16); if (parent->stride == 0) parent->stride = ALIGN(extent, align); return align; } Buffer* lovrBufferCreate(const BufferInfo* info, void** data) { uint32_t fieldCount = info->format ? MAX(info->fieldCount, info->format->fieldCount + 1) : 0; size_t charCount = 0; for (uint32_t i = 0; i < fieldCount; i++) { if (!info->format[i].name) continue; charCount += strlen(info->format[i].name) + 1; } charCount = ALIGN(charCount, 8); Buffer* buffer = lovrCalloc(sizeof(Buffer) + charCount + fieldCount * sizeof(DataField)); buffer->ref = 1; buffer->info = *info; buffer->info.fieldCount = fieldCount; if (info->format) { lovrCheck(info->format->length > 0, "Buffer length can not be zero"); char* names = (char*) buffer + sizeof(Buffer); DataField* format = buffer->info.format = (DataField*) (names + charCount); memcpy(format, info->format, fieldCount * sizeof(DataField)); // Copy names, hash names, fixup children pointers for (uint32_t i = 0; i < fieldCount; i++) { if (format[i].name) { size_t length = strlen(format[i].name); memcpy(names, format[i].name, length); names[length] = '\0'; format[i].name = names; format[i].hash = (uint32_t) hash64(format[i].name, length); names += length + 1; } if (format[i].fields) { format[i].fields = format + (format[i].fields - info->format); } } // Root child pointer is optional, and if absent it implicitly points to next field if (format->fieldCount > 0 && !format->fields) { format->fields = format + 1; } // Size is optional, and can be computed from format if (buffer->info.size == 0) { buffer->info.size = format->stride * MAX(format->length, 1); } // Formats with array/struct fields have extra restrictions, cache it for (uint32_t i = 0; i < format->fieldCount; i++) { if (format->fields[i].fieldCount > 0 || format->fields[i].length > 0) { buffer->info.complexFormat = true; break; } } } lovrCheck(buffer->info.size > 0, "Buffer size can not be zero"); lovrCheck(buffer->info.size <= 1 << 30, "Max buffer size is 1GB"); size_t stride = buffer->info.format ? buffer->info.format->stride : 4; size_t align = lcm(stride, MAX(state.limits.storageBufferAlign, state.limits.uniformBufferAlign)); BufferView view = getBuffer(GPU_BUFFER_STATIC, buffer->info.size, align); buffer->gpu = view.buffer; buffer->base = view.offset; buffer->block = view.block; atomic_fetch_add(&buffer->block->ref, 1); if (data) { if (view.pointer) { *data = view.pointer; } else { beginFrame(); BufferView staging = getBuffer(GPU_BUFFER_UPLOAD, buffer->info.size, 4); gpu_copy_buffers(state.stream, staging.buffer, buffer->gpu, staging.offset, buffer->base, buffer->info.size); buffer->sync.writePhase = GPU_PHASE_COPY; buffer->sync.pendingWrite = GPU_CACHE_TRANSFER_WRITE; buffer->sync.lastTransferWrite = state.tick; *data = staging.pointer; } } buffer->sync.barrier = &state.barrier; return buffer; } void lovrBufferDestroy(void* ref) { Buffer* buffer = ref; BufferAllocator* allocator = &state.bufferAllocators[GPU_BUFFER_STATIC]; if (buffer->block != allocator->current && atomic_fetch_sub(&buffer->block->ref, 1) == 1) { freeBlock(allocator, buffer->block); } lovrFree(buffer); } const BufferInfo* lovrBufferGetInfo(Buffer* buffer) { return &buffer->info; } void* lovrBufferGetData(Buffer* buffer, uint32_t offset, uint32_t extent) { beginFrame(); if (extent == ~0u) extent = buffer->info.size - offset; lovrCheck(offset + extent <= buffer->info.size, "Buffer read range goes past the end of the Buffer"); gpu_barrier barrier = syncTransfer(&buffer->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_READ); gpu_sync(state.stream, &barrier, 1); BufferView view = getBuffer(GPU_BUFFER_DOWNLOAD, extent, 4); gpu_copy_buffers(state.stream, buffer->gpu, view.buffer, buffer->base + offset, view.offset, extent); lovrGraphicsSubmit(NULL, 0); lovrGraphicsWait(); return view.pointer; } void* lovrBufferSetData(Buffer* buffer, uint32_t offset, uint32_t extent) { beginFrame(); if (extent == ~0u) extent = buffer->info.size - offset; lovrCheck(offset + extent <= buffer->info.size, "Attempt to write past the end of the Buffer"); BufferView view = getBuffer(GPU_BUFFER_UPLOAD, extent, 4); gpu_barrier barrier = syncTransfer(&buffer->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_WRITE); gpu_sync(state.stream, &barrier, 1); gpu_copy_buffers(state.stream, view.buffer, buffer->gpu, view.offset, buffer->base + offset, extent); return view.pointer; } void lovrBufferCopy(Buffer* src, Buffer* dst, uint32_t srcOffset, uint32_t dstOffset, uint32_t extent) { beginFrame(); lovrCheck(srcOffset + extent <= src->info.size, "Buffer copy range goes past the end of the source Buffer"); lovrCheck(dstOffset + extent <= dst->info.size, "Buffer copy range goes past the end of the destination Buffer"); lovrCheck(src != dst || (srcOffset >= dstOffset + extent || dstOffset >= srcOffset + extent), "Copying part of a Buffer to itself requires non-overlapping copy regions"); gpu_barrier barriers[2]; barriers[0] = syncTransfer(&src->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_READ); barriers[1] = syncTransfer(&dst->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_WRITE); gpu_sync(state.stream, barriers, 2); gpu_copy_buffers(state.stream, src->gpu, dst->gpu, src->base + srcOffset, dst->base + dstOffset, extent); } void lovrBufferClear(Buffer* buffer, uint32_t offset, uint32_t extent, uint32_t value) { if (extent == 0) return; if (extent == ~0u) extent = buffer->info.size - offset; lovrCheck(offset % 4 == 0, "Buffer clear offset must be a multiple of 4"); lovrCheck(extent % 4 == 0, "Buffer clear extent must be a multiple of 4"); lovrCheck(offset + extent <= buffer->info.size, "Buffer clear range goes past the end of the Buffer"); beginFrame(); gpu_barrier barrier = syncTransfer(&buffer->sync, GPU_PHASE_CLEAR, GPU_CACHE_TRANSFER_WRITE); gpu_sync(state.stream, &barrier, 1); gpu_clear_buffer(state.stream, buffer->gpu, buffer->base + offset, extent, value); } // Texture Texture* lovrGraphicsGetWindowTexture(void) { if (!state.window && os_window_is_open()) { uint32_t width, height; os_window_get_size(&width, &height); float density = os_window_get_pixel_density(); width *= density; height *= density; state.window = lovrCalloc(sizeof(Texture)); state.window->ref = 1; state.window->gpu = NULL; state.window->renderView = NULL; state.window->info = (TextureInfo) { .type = TEXTURE_2D, .format = GPU_FORMAT_SURFACE, .width = width, .height = height, .layers = 1, .mipmaps = 1, .usage = TEXTURE_RENDER, .srgb = true }; bool vsync = state.config.vsync; #ifndef LOVR_DISABLE_HEADSET if (lovrHeadsetInterface && lovrHeadsetInterface->driverType != DRIVER_SIMULATOR) { vsync = false; } #endif gpu_surface_info info = { .width = width, .height = height, .vsync = vsync, #if defined(_WIN32) .win32.window = os_get_win32_window(), .win32.instance = os_get_win32_instance() #elif defined(__APPLE__) .macos.layer = os_get_ca_metal_layer() #elif defined(__linux__) && !defined(__ANDROID__) .xcb.connection = os_get_xcb_connection(), .xcb.window = os_get_xcb_window() #endif }; gpu_surface_init(&info); os_on_resize(onResize); state.depthFormat = state.config.stencil ? FORMAT_D32FS8 : FORMAT_D32F; if (state.config.stencil && !lovrGraphicsGetFormatSupport(state.depthFormat, TEXTURE_FEATURE_RENDER)) { state.depthFormat = FORMAT_D24S8; // Guaranteed to be supported if the other one isn't } } if (state.window && !state.window->gpu) { beginFrame(); state.window->gpu = gpu_surface_acquire(); state.window->renderView = state.window->gpu; // Window texture may be unavailable during a resize if (!state.window->gpu) { return NULL; } } return state.window; } Texture* lovrTextureCreate(const TextureInfo* info) { uint32_t limits[] = { [TEXTURE_2D] = state.limits.textureSize2D, [TEXTURE_3D] = state.limits.textureSize3D, [TEXTURE_CUBE] = state.limits.textureSizeCube, [TEXTURE_ARRAY] = state.limits.textureSize2D }; uint32_t limit = limits[info->type]; uint32_t mipmapCap = log2(MAX(MAX(info->width, info->height), (info->type == TEXTURE_3D ? info->layers : 1))) + 1; uint32_t mipmaps = CLAMP(info->mipmaps, 1, mipmapCap); bool srgb = supportsSRGB(info->format) && info->srgb; uint8_t supports = state.features.formats[info->format][srgb]; uint8_t linearSupports = state.features.formats[info->format][false]; lovrCheck(info->width > 0, "Texture width must be greater than zero"); lovrCheck(info->height > 0, "Texture height must be greater than zero"); lovrCheck(info->layers > 0, "Texture layer count must be greater than zero"); lovrCheck(info->width <= limit, "Texture %s exceeds the limit for this texture type (%d)", "width", limit); lovrCheck(info->height <= limit, "Texture %s exceeds the limit for this texture type (%d)", "height", limit); lovrCheck(info->layers <= limit || info->type != TEXTURE_3D, "Texture %s exceeds the limit for this texture type (%d)", "layer count", limit); lovrCheck(info->layers <= state.limits.textureLayers || info->type == TEXTURE_3D, "Texture %s exceeds the limit for this texture type (%d)", "layer count", limit); lovrCheck(info->layers == 1 || info->type != TEXTURE_2D, "2D textures must have a layer count of 1"); lovrCheck(info->layers % 6 == 0 || info->type != TEXTURE_CUBE, "Cubemap layer count must be a multiple of 6"); lovrCheck(info->width == info->height || info->type != TEXTURE_CUBE, "Cubemaps must be square"); lovrCheck(measureTexture(info->format, info->width, info->height, info->layers) < 1 << 30, "Memory for a Texture can not exceed 1GB"); // TODO mip? lovrCheck(~info->usage & TEXTURE_SAMPLE || (supports & GPU_FEATURE_SAMPLE), "GPU does not support the 'sample' flag for this texture format/encoding"); lovrCheck(~info->usage & TEXTURE_RENDER || (supports & GPU_FEATURE_RENDER), "GPU does not support the 'render' flag for this texture format/encoding"); lovrCheck(~info->usage & TEXTURE_STORAGE || (linearSupports & GPU_FEATURE_STORAGE), "GPU does not support the 'storage' flag for this texture format"); lovrCheck(~info->usage & TEXTURE_RENDER || info->width <= state.limits.renderSize[0], "Texture has 'render' flag but its size exceeds the renderSize limit"); lovrCheck(~info->usage & TEXTURE_RENDER || info->height <= state.limits.renderSize[1], "Texture has 'render' flag but its size exceeds the renderSize limit"); lovrCheck(~info->usage & TEXTURE_RENDER || info->type != TEXTURE_3D || !isDepthFormat(info->format), "3D depth textures can not have the 'render' flag"); lovrCheck((info->format < FORMAT_BC1 || info->format > FORMAT_BC7) || state.features.textureBC, "%s textures are not supported on this GPU", "BC"); lovrCheck(info->format < FORMAT_ASTC_4x4 || state.features.textureASTC, "%s textures are not supported on this GPU", "ASTC"); Texture* texture = lovrCalloc(sizeof(Texture) + gpu_sizeof_texture()); texture->ref = 1; texture->gpu = (gpu_texture*) (texture + 1); texture->root = texture; texture->info = *info; texture->info.mipmaps = mipmaps; texture->info.srgb = srgb; uint32_t levelCount = 0; uint32_t levelOffsets[16]; uint32_t levelSizes[16]; BufferView view = { 0 }; beginFrame(); if (info->imageCount > 0) { levelCount = lovrImageGetLevelCount(info->images[0]); lovrCheck(info->type != TEXTURE_3D || levelCount == 1, "Images used to initialize 3D textures can not have mipmaps"); uint32_t total = 0; for (uint32_t level = 0; level < levelCount; level++) { levelOffsets[level] = total; uint32_t width = MAX(info->width >> level, 1); uint32_t height = MAX(info->height >> level, 1); levelSizes[level] = measureTexture(info->format, width, height, info->layers); total += levelSizes[level]; } view = getBuffer(GPU_BUFFER_UPLOAD, total, 64); char* data = view.pointer; for (uint32_t level = 0; level < levelCount; level++) { for (uint32_t layer = 0; layer < info->layers; layer++) { Image* image = info->imageCount == 1 ? info->images[0] : info->images[layer]; uint32_t slice = info->imageCount == 1 ? layer : 0; size_t size = lovrImageGetLayerSize(image, level); lovrCheck(size == levelSizes[level] / info->layers, "Texture/Image size mismatch!"); void* pixels = lovrImageGetLayerData(image, level, slice); memcpy(data, pixels, size); data += size; } levelOffsets[level] += view.offset; } } // Render targets with mipmaps get transfer usage for automipmapping bool transfer = (info->usage & TEXTURE_TRANSFER) || ((info->usage & TEXTURE_RENDER) && texture->info.mipmaps > 1); gpu_texture_init(texture->gpu, &(gpu_texture_info) { .type = (gpu_texture_type) info->type, .format = (gpu_texture_format) info->format, .size = { info->width, info->height, info->layers }, .mipmaps = texture->info.mipmaps, .usage = ((info->usage & TEXTURE_SAMPLE) ? GPU_TEXTURE_SAMPLE : 0) | ((info->usage & TEXTURE_RENDER) ? GPU_TEXTURE_RENDER : 0) | ((info->usage & TEXTURE_STORAGE) ? GPU_TEXTURE_STORAGE : 0) | (transfer ? GPU_TEXTURE_COPY_SRC | GPU_TEXTURE_COPY_DST : 0), .srgb = srgb, .handle = info->handle, .label = info->label, .upload = { .stream = state.stream, .buffer = view.buffer, .levelCount = levelCount, .levelOffsets = levelOffsets, .generateMipmaps = levelCount > 0 && levelCount < mipmaps } }); // Automatically create a renderable view for renderable non-volume textures if ((info->usage & TEXTURE_RENDER) && info->type != TEXTURE_3D && info->layers <= state.limits.renderSize[2]) { if (info->mipmaps == 1) { texture->renderView = texture->gpu; } else { gpu_texture_view_info view = { .source = texture->gpu, .type = GPU_TEXTURE_ARRAY, .usage = GPU_TEXTURE_RENDER, .srgb = srgb, .layerCount = info->layers, .levelCount = 1 }; texture->renderView = lovrMalloc(gpu_sizeof_texture()); gpu_texture_init_view(texture->renderView, &view); } } // Make a linear view of sRGB textures for storage bindings if (srgb && (info->usage & TEXTURE_STORAGE)) { gpu_texture_view_info view = { .source = texture->gpu, .type = (gpu_texture_type) info->type, .usage = GPU_TEXTURE_STORAGE, .srgb = false }; texture->storageView = lovrMalloc(gpu_sizeof_texture()); gpu_texture_init_view(texture->storageView, &view); } else { texture->storageView = texture->gpu; } // Sample-only textures are exempt from sync tracking to reduce overhead. Instead, they are // manually synchronized with a single barrier after the upload stream. if (info->usage == TEXTURE_SAMPLE) { state.barrier.prev |= GPU_PHASE_COPY | GPU_PHASE_BLIT; state.barrier.next |= GPU_PHASE_SHADER_VERTEX | GPU_PHASE_SHADER_FRAGMENT | GPU_PHASE_SHADER_COMPUTE; state.barrier.flush |= GPU_CACHE_TRANSFER_WRITE; state.barrier.clear |= GPU_CACHE_TEXTURE; } else if (levelCount > 0) { texture->sync.writePhase = GPU_PHASE_COPY | GPU_PHASE_BLIT; texture->sync.pendingWrite = GPU_CACHE_TRANSFER_WRITE; texture->sync.lastTransferWrite = state.tick; } texture->sync.barrier = &state.barrier; return texture; } Texture* lovrTextureCreateView(Texture* parent, const TextureViewInfo* info) { const TextureInfo* base = &parent->info; uint32_t maxLayers = base->type == TEXTURE_3D ? MAX(base->layers >> info->levelIndex, 1) : base->layers; lovrCheck(info->type != TEXTURE_3D, "Texture views can't be 3D textures"); lovrCheck(info->layerCount > 0, "Texture view must have at least one layer"); lovrCheck(info->levelCount > 0, "Texture view must have at least one mipmap"); lovrCheck(info->layerCount == ~0u || info->layerIndex + info->layerCount <= maxLayers, "Texture view layer range exceeds layer count of parent texture"); lovrCheck(info->levelCount == ~0u || info->levelIndex + info->levelCount <= base->mipmaps, "Texture view mipmap range exceeds mipmap count of parent texture"); lovrCheck(info->layerCount == 1 || info->type != TEXTURE_2D, "2D textures can only have a single layer"); lovrCheck(info->levelCount == 1 || base->type != TEXTURE_3D, "Views of volume textures may only have a single mipmap level"); lovrCheck(info->layerCount % 6 == 0 || info->type != TEXTURE_CUBE, "Cubemap layer count must be a multiple of 6"); Texture* texture = lovrCalloc(sizeof(Texture) + gpu_sizeof_texture()); texture->ref = 1; texture->gpu = (gpu_texture*) (texture + 1); texture->info = *base; texture->root = parent->root; texture->baseLayer = parent->baseLayer + info->layerIndex; texture->baseLevel = parent->baseLevel + info->levelIndex; texture->info.type = info->type; texture->info.width = MAX(base->width >> info->levelIndex, 1); texture->info.height = MAX(base->height >> info->levelIndex, 1); texture->info.layers = info->layerCount == ~0u ? base->layers : info->layerCount; texture->info.mipmaps = info->levelCount == ~0u ? base->mipmaps : info->levelCount; if (base->usage & (TEXTURE_SAMPLE | TEXTURE_RENDER)) { gpu_texture_init_view(texture->gpu, &(gpu_texture_view_info) { .source = texture->root->gpu, .type = (gpu_texture_type) info->type, .usage = base->usage, .srgb = base->srgb, .layerIndex = texture->baseLayer, .layerCount = info->layerCount, .levelIndex = texture->baseLevel, .levelCount = info->levelCount, .label = info->label }); } else { texture->gpu = NULL; } if ((base->usage & TEXTURE_RENDER) && info->layerCount <= state.limits.renderSize[2]) { if (info->levelCount == 1) { texture->renderView = texture->gpu; } else { gpu_texture_view_info subview = { .source = texture->root->gpu, .type = GPU_TEXTURE_ARRAY, .usage = GPU_TEXTURE_RENDER, .layerIndex = texture->baseLayer, .layerCount = info->layerCount, .levelIndex = texture->baseLevel, .levelCount = 1 }; texture->renderView = lovrMalloc(gpu_sizeof_texture()); gpu_texture_init_view(texture->renderView, &subview); } } if ((base->usage & TEXTURE_STORAGE) && base->srgb) { gpu_texture_view_info subview = { .source = texture->root->gpu, .type = (gpu_texture_type) base->type, .usage = GPU_TEXTURE_STORAGE, .srgb = false, .layerIndex = texture->baseLayer, .layerCount = info->layerCount, .levelIndex = texture->baseLevel, .levelCount = info->levelCount }; texture->storageView = lovrMalloc(gpu_sizeof_texture()); gpu_texture_init_view(texture->storageView, &subview); } else { texture->storageView = texture->gpu; } lovrRetain(texture->root); return texture; } void lovrTextureDestroy(void* ref) { Texture* texture = ref; if (texture != state.window) { flushTransfers(); lovrRelease(texture->material, lovrMaterialDestroy); if (texture->root != texture) lovrRelease(texture->root, lovrTextureDestroy); if (texture->renderView && texture->renderView != texture->gpu) gpu_texture_destroy(texture->renderView); if (texture->storageView && texture->storageView != texture->gpu) gpu_texture_destroy(texture->storageView); if (texture->gpu) gpu_texture_destroy(texture->gpu); } lovrFree(texture); } const TextureInfo* lovrTextureGetInfo(Texture* texture) { return &texture->info; } Texture* lovrTextureGetParent(Texture* texture) { return texture->root == texture ? NULL : texture->root; } Image* lovrTextureGetPixels(Texture* texture, uint32_t offset[4], uint32_t extent[3]) { beginFrame(); if (extent[0] == ~0u) extent[0] = texture->info.width - offset[0]; if (extent[1] == ~0u) extent[1] = texture->info.height - offset[1]; lovrCheck(extent[2] == 1, "Currently only a single layer can be read from a Texture"); lovrCheck(texture->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to read from it"); checkTextureBounds(&texture->info, offset, extent); gpu_barrier barrier = syncTransfer(&texture->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_READ); gpu_sync(state.stream, &barrier, 1); uint32_t rootOffset[4] = { offset[0], offset[1], offset[2] + texture->baseLayer, offset[3] + texture->baseLevel }; BufferView view = getBuffer(GPU_BUFFER_DOWNLOAD, measureTexture(texture->info.format, extent[0], extent[1], 1), 64); gpu_copy_texture_buffer(state.stream, texture->root->gpu, view.buffer, rootOffset, view.offset, extent); lovrGraphicsSubmit(NULL, 0); lovrGraphicsWait(); Image* image = lovrImageCreateRaw(extent[0], extent[1], texture->info.format, texture->info.srgb); void* data = lovrImageGetLayerData(image, offset[3], offset[2]); memcpy(data, view.pointer, view.extent); return image; } void lovrTextureSetPixels(Texture* texture, Image* image, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t extent[3]) { beginFrame(); TextureFormat format = texture->info.format; if (extent[0] == ~0u) extent[0] = MIN(texture->info.width - dstOffset[0], lovrImageGetWidth(image, srcOffset[3]) - srcOffset[0]); if (extent[1] == ~0u) extent[1] = MIN(texture->info.height - dstOffset[1], lovrImageGetHeight(image, srcOffset[3]) - srcOffset[1]); if (extent[2] == ~0u) extent[2] = MIN(texture->info.layers - dstOffset[2], lovrImageGetLayerCount(image) - srcOffset[2]); lovrCheck(texture->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to copy to it"); lovrCheck(lovrImageGetFormat(image) == format, "Image and Texture formats must match"); lovrCheck(srcOffset[0] + extent[0] <= lovrImageGetWidth(image, srcOffset[3]), "Image copy region exceeds its %s", "width"); lovrCheck(srcOffset[1] + extent[1] <= lovrImageGetHeight(image, srcOffset[3]), "Image copy region exceeds its %s", "height"); lovrCheck(srcOffset[2] + extent[2] <= lovrImageGetLayerCount(image), "Image copy region exceeds its %s", "layer count"); lovrCheck(srcOffset[3] < lovrImageGetLevelCount(image), "Image copy region exceeds its %s", "mipmap count"); checkTextureBounds(&texture->info, dstOffset, extent); uint32_t rowSize = measureTexture(format, extent[0], 1, 1); uint32_t totalSize = measureTexture(format, extent[0], extent[1], 1) * extent[2]; uint32_t layerOffset = measureTexture(format, extent[0], srcOffset[1], 1); layerOffset += measureTexture(format, srcOffset[0], 1, 1); uint32_t pitch = measureTexture(format, lovrImageGetWidth(image, srcOffset[3]), 1, 1); BufferView view = getBuffer(GPU_BUFFER_UPLOAD, totalSize, 64); char* dst = view.pointer; for (uint32_t z = 0; z < extent[2]; z++) { const char* src = (char*) lovrImageGetLayerData(image, srcOffset[3], z) + layerOffset; for (uint32_t y = 0; y < extent[1]; y++) { memcpy(dst, src, rowSize); dst += rowSize; src += pitch; } } gpu_barrier barrier = syncTransfer(&texture->root->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_WRITE); gpu_sync(state.stream, &barrier, 1); uint32_t rootOffset[4] = { dstOffset[0], dstOffset[1], dstOffset[2] + texture->baseLayer, dstOffset[3] + texture->baseLevel }; gpu_copy_buffer_texture(state.stream, view.buffer, texture->root->gpu, view.offset, rootOffset, extent); } void lovrTextureCopy(Texture* src, Texture* dst, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t extent[3]) { beginFrame(); if (extent[0] == ~0u) extent[0] = MIN(src->info.width - srcOffset[0], dst->info.width - dstOffset[0]); if (extent[1] == ~0u) extent[1] = MIN(src->info.height - srcOffset[1], dst->info.height - dstOffset[0]); if (extent[2] == ~0u) extent[2] = MIN(src->info.layers - srcOffset[2], dst->info.layers - dstOffset[0]); lovrCheck(src->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to copy %s it", "from"); lovrCheck(dst->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to copy %s it", "to"); lovrCheck(src->info.format == dst->info.format, "Copying between Textures requires them to have the same format"); checkTextureBounds(&src->info, srcOffset, extent); checkTextureBounds(&dst->info, dstOffset, extent); gpu_barrier barriers[2]; barriers[0] = syncTransfer(&src->root->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_READ); barriers[1] = syncTransfer(&dst->root->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_WRITE); gpu_sync(state.stream, barriers, 2); uint32_t srcRootOffset[4] = { srcOffset[0], srcOffset[1], srcOffset[2] + src->baseLayer, srcOffset[3] + src->baseLevel }; uint32_t dstRootOffset[4] = { dstOffset[0], dstOffset[1], dstOffset[2] + dst->baseLayer, dstOffset[3] + dst->baseLevel }; gpu_copy_textures(state.stream, src->root->gpu, dst->root->gpu, srcRootOffset, dstRootOffset, extent); } void lovrTextureBlit(Texture* src, Texture* dst, uint32_t srcOffset[4], uint32_t dstOffset[4], uint32_t srcExtent[3], uint32_t dstExtent[3], FilterMode filter) { beginFrame(); if (srcExtent[0] == ~0u) srcExtent[0] = src->info.width - srcOffset[0]; if (srcExtent[1] == ~0u) srcExtent[1] = src->info.height - srcOffset[1]; if (srcExtent[2] == ~0u) srcExtent[2] = src->info.layers - srcOffset[2]; if (dstExtent[0] == ~0u) dstExtent[0] = dst->info.width - dstOffset[0]; if (dstExtent[1] == ~0u) dstExtent[1] = dst->info.height - dstOffset[1]; if (dstExtent[2] == ~0u) dstExtent[2] = dst->info.layers - dstOffset[2]; uint32_t supports = state.features.formats[src->info.format][src->info.srgb]; lovrCheck(src->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to blit %s it", "from"); lovrCheck(dst->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to blit %s it", "to"); lovrCheck(supports & GPU_FEATURE_BLIT, "This GPU does not support blitting this texture format/encoding"); lovrCheck(src->info.format == dst->info.format && src->info.srgb == dst->info.srgb, "Texture formats must match to blit between them"); lovrCheck(((src->info.type == TEXTURE_3D) ^ (dst->info.type == TEXTURE_3D)) == false, "3D textures can only be blitted with other 3D textures"); lovrCheck(src->info.type == TEXTURE_3D || srcExtent[2] == dstExtent[2], "When blitting between non-3D textures, blit layer counts must match"); checkTextureBounds(&src->info, srcOffset, srcExtent); checkTextureBounds(&dst->info, dstOffset, dstExtent); gpu_barrier barriers[2]; barriers[0] = syncTransfer(&src->root->sync, GPU_PHASE_BLIT, GPU_CACHE_TRANSFER_READ); barriers[1] = syncTransfer(&dst->root->sync, GPU_PHASE_BLIT, GPU_CACHE_TRANSFER_WRITE); gpu_sync(state.stream, barriers, 2); uint32_t srcRootOffset[4] = { srcOffset[0], srcOffset[1], srcOffset[2] + src->baseLayer, srcOffset[3] + src->baseLevel }; uint32_t dstRootOffset[4] = { dstOffset[0], dstOffset[1], dstOffset[2] + dst->baseLayer, dstOffset[3] + dst->baseLevel }; gpu_blit(state.stream, src->root->gpu, dst->root->gpu, srcRootOffset, dstRootOffset, srcExtent, dstExtent, (gpu_filter) filter); } void lovrTextureClear(Texture* texture, float value[4], uint32_t layer, uint32_t layerCount, uint32_t level, uint32_t levelCount) { beginFrame(); if (layerCount == ~0u) layerCount = texture->info.layers - layer; if (levelCount == ~0u) levelCount = texture->info.mipmaps - level; lovrCheck(texture->info.usage & TEXTURE_TRANSFER, "Texture must be created with 'transfer' usage to clear it"); lovrCheck(texture->info.type == TEXTURE_3D || layer + layerCount <= texture->info.layers, "Texture clear range exceeds texture layer count"); lovrCheck(level + levelCount <= texture->info.mipmaps, "Texture clear range exceeds texture mipmap count"); gpu_barrier barrier = syncTransfer(&texture->root->sync, GPU_PHASE_CLEAR, GPU_CACHE_TRANSFER_WRITE); gpu_sync(state.stream, &barrier, 1); gpu_clear_texture(state.stream, texture->root->gpu, value, texture->baseLayer + layer, layerCount, texture->baseLevel + level, levelCount); } void lovrTextureGenerateMipmaps(Texture* texture, uint32_t base, uint32_t count) { beginFrame(); if (count == ~0u) count = texture->info.mipmaps - (base + 1); uint32_t supports = state.features.formats[texture->info.format][texture->info.srgb]; lovrCheck(texture->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to mipmap it"); lovrCheck(supports & GPU_FEATURE_BLIT, "This GPU does not support mipmapping this texture format/encoding"); lovrCheck(base + count < texture->info.mipmaps, "Trying to generate too many mipmaps"); gpu_barrier barrier = syncTransfer(&texture->root->sync, GPU_PHASE_BLIT, GPU_CACHE_TRANSFER_READ | GPU_CACHE_TRANSFER_WRITE); gpu_sync(state.stream, &barrier, 1); mipmapTexture(state.stream, texture, texture->baseLevel + base, count); } Material* lovrTextureToMaterial(Texture* texture) { if (!texture->material) { texture->material = lovrMaterialCreate(&(MaterialInfo) { .data.color = { 1.f, 1.f, 1.f, 1.f }, .data.uvScale = { 1.f, 1.f }, .texture = texture }); // Since the Material refcounts the Texture, this creates a cycle. Release the texture to make // sure this is a weak relationship (the automaterial does not keep the texture refcounted). lovrRelease(texture, lovrTextureDestroy); texture->material->info.texture = NULL; } return texture->material; } // Sampler Sampler* lovrGraphicsGetDefaultSampler(FilterMode mode) { return state.defaultSamplers[mode]; } Sampler* lovrSamplerCreate(const SamplerInfo* info) { lovrCheck(info->range[1] < 0.f || info->range[1] >= info->range[0], "Invalid Sampler mipmap range"); lovrCheck(info->anisotropy <= state.limits.anisotropy, "Sampler anisotropy (%f) exceeds anisotropy limit (%f)", info->anisotropy, state.limits.anisotropy); Sampler* sampler = lovrCalloc(sizeof(Sampler) + gpu_sizeof_sampler()); sampler->ref = 1; sampler->gpu = (gpu_sampler*) (sampler + 1); sampler->info = *info; gpu_sampler_info gpu = { .min = (gpu_filter) info->min, .mag = (gpu_filter) info->mag, .mip = (gpu_filter) info->mip, .wrap[0] = (gpu_wrap) info->wrap[0], .wrap[1] = (gpu_wrap) info->wrap[1], .wrap[2] = (gpu_wrap) info->wrap[2], .compare = (gpu_compare_mode) info->compare, .anisotropy = MIN(info->anisotropy, state.limits.anisotropy), .lodClamp = { info->range[0], info->range[1] } }; gpu_sampler_init(sampler->gpu, &gpu); return sampler; } void lovrSamplerDestroy(void* ref) { Sampler* sampler = ref; gpu_sampler_destroy(sampler->gpu); lovrFree(sampler); } const SamplerInfo* lovrSamplerGetInfo(Sampler* sampler) { return &sampler->info; } // Shader #ifdef LOVR_USE_GLSLANG static glsl_include_result_t* includer(void* cb, const char* path, const char* includer, size_t depth) { if (!strcmp(path, includer)) { return NULL; } glsl_include_result_t* result = tempAlloc(&state.allocator, sizeof(*result)); lovrAssert(result, "Out of memory"); result->header_name = path; result->header_data = ((ShaderIncluder*) cb)(path, &result->header_length); if (!result->header_data) return NULL; return result; } #endif void lovrGraphicsCompileShader(ShaderSource* stages, ShaderSource* outputs, uint32_t stageCount, ShaderIncluder* io) { #ifdef LOVR_USE_GLSLANG const glslang_stage_t stageMap[] = { [STAGE_VERTEX] = GLSLANG_STAGE_VERTEX, [STAGE_FRAGMENT] = GLSLANG_STAGE_FRAGMENT, [STAGE_COMPUTE] = GLSLANG_STAGE_COMPUTE }; const char* stageNames[] = { [STAGE_VERTEX] = "vertex", [STAGE_FRAGMENT] = "fragment", [STAGE_COMPUTE] = "compute" }; const char* prefix = "" "#version 460\n" "#extension GL_EXT_multiview : require\n" "#extension GL_EXT_samplerless_texture_functions : require\n" "#extension GL_GOOGLE_include_directive : require\n"; glslang_program_t* program = NULL; glslang_shader_t* shaders[2] = { 0 }; if (stageCount > COUNTOF(shaders)) { lovrUnreachable(); } for (uint32_t i = 0; i < stageCount; i++) { ShaderSource* source = &stages[i]; // It's okay to pass precompiled SPIR-V here, and it will be returned unchanged. However, it's // dangerous to mix SPIR-V and GLSL because then glslang won't perform cross-stage linking, // which means that e.g. the default uniform block might be different for each stage. This // isn't a problem when using the default shaders since they don't use uniforms. uint32_t magic = 0x07230203; if (source->size % 4 == 0 && source->size >= 4 && !memcmp(source->code, &magic, 4)) { outputs[i] = stages[i]; continue; } else if (!program) { program = glslang_program_create(); } const char* strings[] = { prefix, (const char*) etc_shaders_lovr_glsl, "#line 1\n", source->code }; lovrCheck(source->size <= INT_MAX, "Shader is way too big"); int lengths[] = { -1, etc_shaders_lovr_glsl_len, -1, (int) source->size }; const glslang_resource_t* resource = glslang_default_resource(); glslang_input_t input = { .language = GLSLANG_SOURCE_GLSL, .stage = stageMap[source->stage], .client = GLSLANG_CLIENT_VULKAN, .client_version = GLSLANG_TARGET_VULKAN_1_1, .target_language = GLSLANG_TARGET_SPV, .target_language_version = GLSLANG_TARGET_SPV_1_3, .strings = strings, .lengths = lengths, .string_count = COUNTOF(strings), .default_version = 460, .default_profile = GLSLANG_NO_PROFILE, .forward_compatible = true, .resource = resource, .callbacks.include_local = includer, .callbacks_ctx = (void*) io }; shaders[i] = glslang_shader_create(&input); int options = 0; options |= GLSLANG_SHADER_AUTO_MAP_BINDINGS; options |= GLSLANG_SHADER_AUTO_MAP_LOCATIONS; options |= GLSLANG_SHADER_VULKAN_RULES_RELAXED; glslang_shader_set_options(shaders[i], options); if (!glslang_shader_preprocess(shaders[i], &input)) { lovrThrow("Could not preprocess %s shader:\n%s", stageNames[source->stage], glslang_shader_get_info_log(shaders[i])); } if (!glslang_shader_parse(shaders[i], &input)) { lovrThrow("Could not parse %s shader:\n%s", stageNames[source->stage], glslang_shader_get_info_log(shaders[i])); } glslang_program_add_shader(program, shaders[i]); } // We might not need to do anything if all the inputs were already SPIR-V if (!program) { return; } if (!glslang_program_link(program, 0)) { lovrThrow("Could not link shader:\n%s", glslang_program_get_info_log(program)); } glslang_program_map_io(program); glslang_spv_options_t spvOptions = { 0 }; if (state.config.debug && state.features.shaderDebug) { spvOptions.generate_debug_info = true; spvOptions.emit_nonsemantic_shader_debug_info = true; spvOptions.emit_nonsemantic_shader_debug_source = true; } for (uint32_t i = 0; i < stageCount; i++) { if (!shaders[i]) continue; ShaderSource* source = &stages[i]; if (state.config.debug && state.features.shaderDebug) { glslang_program_add_source_text(program, stageMap[source->stage], source->code, source->size); } glslang_program_SPIRV_generate_with_options(program, stageMap[source->stage], &spvOptions); void* words = glslang_program_SPIRV_get_ptr(program); size_t size = glslang_program_SPIRV_get_size(program) * 4; void* data = lovrMalloc(size); memcpy(data, words, size); outputs[i].stage = source->stage; outputs[i].code = data; outputs[i].size = size; glslang_shader_delete(shaders[i]); } glslang_program_delete(program); #else lovrThrow("Could not compile shader: No shader compiler available"); #endif } static void lovrShaderInit(Shader* shader) { // Shaders store the full list of their flags so clones can override them, but they are reordered // to put overridden (active) ones first, so a contiguous list can be used to create pipelines for (uint32_t i = 0; i < shader->info.flagCount; i++) { ShaderFlag* flag = &shader->info.flags[i]; uint32_t hash = flag->name ? (uint32_t) hash64(flag->name, strlen(flag->name)) : 0; for (uint32_t j = 0; j < shader->flagCount; j++) { if (hash ? (hash != shader->flagLookup[j]) : (flag->id != shader->flags[j].id)) continue; uint32_t index = shader->overrideCount++; if (index != j) { gpu_shader_flag temp = shader->flags[index]; shader->flags[index] = shader->flags[j]; shader->flags[j] = temp; uint32_t tempHash = shader->flagLookup[index]; shader->flagLookup[index] = shader->flagLookup[j]; shader->flagLookup[j] = tempHash; } shader->flags[index].value = flag->value; } } if (shader->info.type == SHADER_COMPUTE) { gpu_compute_pipeline_info pipelineInfo = { .shader = shader->gpu, .flags = shader->flags, .flagCount = shader->overrideCount }; lovrAssert(state.pipelineCount < MAX_PIPELINES, "Too many pipelines!"); shader->computePipeline = getPipeline(state.pipelineCount++); os_vm_commit(state.pipelines, state.pipelineCount * gpu_sizeof_pipeline()); gpu_pipeline_init_compute(shader->computePipeline, &pipelineInfo); } } ShaderSource lovrGraphicsGetDefaultShaderSource(DefaultShader type, ShaderStage stage) { const ShaderSource sources[][3] = { [SHADER_UNLIT] = { [STAGE_VERTEX] = { STAGE_VERTEX, lovr_shader_unlit_vert, sizeof(lovr_shader_unlit_vert) }, [STAGE_FRAGMENT] = { STAGE_FRAGMENT, lovr_shader_unlit_frag, sizeof(lovr_shader_unlit_frag) } }, [SHADER_NORMAL] = { [STAGE_VERTEX] = { STAGE_VERTEX, lovr_shader_unlit_vert, sizeof(lovr_shader_unlit_vert) }, [STAGE_FRAGMENT] = { STAGE_FRAGMENT, lovr_shader_normal_frag, sizeof(lovr_shader_normal_frag) } }, [SHADER_FONT] = { [STAGE_VERTEX] = { STAGE_VERTEX, lovr_shader_unlit_vert, sizeof(lovr_shader_unlit_vert) }, [STAGE_FRAGMENT] = { STAGE_FRAGMENT, lovr_shader_font_frag, sizeof(lovr_shader_font_frag) } }, [SHADER_CUBEMAP] = { [STAGE_VERTEX] = { STAGE_VERTEX, lovr_shader_cubemap_vert, sizeof(lovr_shader_cubemap_vert) }, [STAGE_FRAGMENT] = { STAGE_FRAGMENT, lovr_shader_cubemap_frag, sizeof(lovr_shader_cubemap_frag) } }, [SHADER_EQUIRECT] = { [STAGE_VERTEX] = { STAGE_VERTEX, lovr_shader_cubemap_vert, sizeof(lovr_shader_cubemap_vert) }, [STAGE_FRAGMENT] = { STAGE_FRAGMENT, lovr_shader_equirect_frag, sizeof(lovr_shader_equirect_frag) } }, [SHADER_FILL_2D] = { [STAGE_VERTEX] = { STAGE_VERTEX, lovr_shader_fill_vert, sizeof(lovr_shader_fill_vert) }, [STAGE_FRAGMENT] = { STAGE_FRAGMENT, lovr_shader_unlit_frag, sizeof(lovr_shader_unlit_frag) } }, [SHADER_FILL_ARRAY] = { [STAGE_VERTEX] = { STAGE_VERTEX, lovr_shader_fill_vert, sizeof(lovr_shader_fill_vert) }, [STAGE_FRAGMENT] = { STAGE_FRAGMENT, lovr_shader_fill_array_frag, sizeof(lovr_shader_fill_array_frag) } }, [SHADER_ANIMATOR] = { [STAGE_COMPUTE] = { STAGE_COMPUTE, lovr_shader_animator_comp, sizeof(lovr_shader_animator_comp) } }, [SHADER_BLENDER] = { [STAGE_COMPUTE] = { STAGE_COMPUTE, lovr_shader_blender_comp, sizeof(lovr_shader_blender_comp) } }, [SHADER_TALLY_MERGE] = { [STAGE_COMPUTE] = { STAGE_COMPUTE, lovr_shader_tallymerge_comp, sizeof(lovr_shader_tallymerge_comp) } } }; return sources[type][stage]; } Shader* lovrGraphicsGetDefaultShader(DefaultShader type) { if (state.defaultShaders[type]) { return state.defaultShaders[type]; } switch (type) { case SHADER_ANIMATOR: case SHADER_BLENDER: case SHADER_TALLY_MERGE: return state.defaultShaders[type] = lovrShaderCreate(&(ShaderInfo) { .type = SHADER_COMPUTE, .stages = (ShaderSource[1]) { lovrGraphicsGetDefaultShaderSource(type, STAGE_COMPUTE) }, .stageCount = 1, .flags = &(ShaderFlag) { NULL, 0, state.device.subgroupSize }, .flagCount = 1, .isDefault = true }); default: return state.defaultShaders[type] = lovrShaderCreate(&(ShaderInfo) { .type = SHADER_GRAPHICS, .stages = (ShaderSource[2]) { lovrGraphicsGetDefaultShaderSource(type, STAGE_VERTEX), lovrGraphicsGetDefaultShaderSource(type, STAGE_FRAGMENT) }, .stageCount = 2, .isDefault = true }); } } Shader* lovrShaderCreate(const ShaderInfo* info) { Shader* shader = lovrCalloc(sizeof(Shader) + gpu_sizeof_shader()); shader->ref = 1; shader->gpu = (gpu_shader*) (shader + 1); shader->info = *info; // Validate stage combinations for (uint32_t i = 0; i < info->stageCount; i++) { shader->stageMask |= (1 << info->stages[i].stage); } if (info->type == SHADER_GRAPHICS) { lovrCheck(shader->stageMask == (FLAG_VERTEX | FLAG_FRAGMENT), "Graphics shaders must have a vertex and a pixel stage"); } else if (info->type == SHADER_COMPUTE) { lovrCheck(shader->stageMask == FLAG_COMPUTE, "Compute shaders can only have a compute stage"); } size_t stack = tempPush(&state.allocator); // Copy the source to temp memory (we perform edits on the SPIR-V and the input might be readonly) void* source[2]; for (uint32_t i = 0; i < info->stageCount; i++) { source[i] = tempAlloc(&state.allocator, info->stages[i].size); memcpy(source[i], info->stages[i].code, info->stages[i].size); } // Parse SPIR-V spv_result result; spv_info spv[2] = { 0 }; uint32_t maxResources = 0; uint32_t maxSpecConstants = 0; uint32_t maxFields = 0; uint32_t maxChars = 0; for (uint32_t i = 0; i < info->stageCount; i++) { result = spv_parse(source[i], info->stages[i].size, &spv[i]); lovrCheck(result == SPV_OK, "Failed to load Shader: %s\n", spv_result_to_string(result)); lovrCheck(spv[i].version <= 0x00010300, "Invalid SPIR-V version (up to 1.3 is supported)"); spv[i].features = tempAlloc(&state.allocator, spv[i].featureCount * sizeof(uint32_t)); spv[i].specConstants = tempAlloc(&state.allocator, spv[i].specConstantCount * sizeof(spv_spec_constant)); spv[i].attributes = tempAlloc(&state.allocator, spv[i].attributeCount * sizeof(spv_attribute)); spv[i].resources = tempAlloc(&state.allocator, spv[i].resourceCount * sizeof(spv_resource)); spv[i].fields = tempAlloc(&state.allocator, spv[i].fieldCount * sizeof(spv_field)); memset(spv[i].fields, 0, spv[i].fieldCount * sizeof(spv_field)); result = spv_parse(source[i], info->stages[i].size, &spv[i]); lovrCheck(result == SPV_OK, "Failed to load Shader: %s\n", spv_result_to_string(result)); checkShaderFeatures(spv[i].features, spv[i].featureCount); maxResources += spv[i].resourceCount; maxSpecConstants += spv[i].specConstantCount; maxFields += spv[i].fieldCount; for (uint32_t j = 0; j < spv[i].fieldCount; j++) { spv_field* field = &spv[i].fields[j]; maxChars += field->name ? strlen(field->name) + 1 : 0; } } // Allocate memory shader->resources = lovrMalloc(maxResources * sizeof(ShaderResource)); shader->fields = lovrMalloc(maxFields * sizeof(DataField)); shader->names = lovrMalloc(maxChars); shader->flags = lovrMalloc(maxSpecConstants * sizeof(gpu_shader_flag)); shader->flagLookup = lovrMalloc(maxSpecConstants * sizeof(uint32_t)); // Workgroup size if (info->type == SHADER_COMPUTE) { uint32_t* workgroupSize = spv[0].workgroupSize; uint32_t totalWorkgroupSize = workgroupSize[0] * workgroupSize[1] * workgroupSize[2]; lovrCheck(workgroupSize[0] <= state.limits.workgroupSize[0], "Shader workgroup size exceeds the 'workgroupSize' limit"); lovrCheck(workgroupSize[1] <= state.limits.workgroupSize[1], "Shader workgroup size exceeds the 'workgroupSize' limit"); lovrCheck(workgroupSize[2] <= state.limits.workgroupSize[2], "Shader workgroup size exceeds the 'workgroupSize' limit"); lovrCheck(totalWorkgroupSize <= state.limits.totalWorkgroupSize, "Shader workgroup size exceeds the 'totalWorkgroupSize' limit"); memcpy(shader->workgroupSize, workgroupSize, 3 * sizeof(uint32_t)); } // Vertex attributes if (info->type == SHADER_GRAPHICS && spv[0].attributeCount > 0) { shader->attributeCount = spv[0].attributeCount; shader->attributes = lovrMalloc(shader->attributeCount * sizeof(ShaderAttribute)); for (uint32_t i = 0; i < shader->attributeCount; i++) { shader->attributes[i].location = spv[0].attributes[i].location; shader->attributes[i].hash = (uint32_t) hash64(spv[0].attributes[i].name, strlen(spv[0].attributes[i].name)); shader->hasCustomAttributes |= shader->attributes[i].location < 10; } } uint32_t resourceSet = info->type == SHADER_COMPUTE ? 0 : 2; uint32_t uniformSet = info->type == SHADER_COMPUTE ? 1 : 3; // Resources for (uint32_t s = 0, lastResourceCount = 0; s < info->stageCount; s++, lastResourceCount = shader->resourceCount) { ShaderStage stage = info->stages[s].stage; for (uint32_t i = 0; i < spv[s].resourceCount; i++) { spv_resource* resource = &spv[s].resources[i]; // It's safe to cast away const because we are operating on a copy of the input uint32_t* set = (uint32_t*) resource->set; uint32_t* binding = (uint32_t*) resource->binding; // glslang outputs gl_DefaultUniformBlock, there's also the Constants macro which defines a DefaultUniformBlock UBO if (!strcmp(resource->name, "gl_DefaultUniformBlock") || !strcmp(resource->name, "DefaultUniformBlock")) { spv_field* block = resource->bufferFields; shader->uniformSize = block->elementSize; shader->uniformCount = block->fieldCount; shader->uniforms = shader->fields + ((s == 1 ? spv[0].fieldCount : 0) + (block->fields - spv[s].fields)); *set = uniformSet; *binding = 0; continue; } // Skip builtin resources if (info->type == SHADER_GRAPHICS && ((*set == 0 && *binding <= LAST_BUILTIN_BINDING) || *set == 1)) { continue; } static const gpu_slot_type types[] = { [SPV_UNIFORM_BUFFER] = GPU_SLOT_UNIFORM_BUFFER, [SPV_STORAGE_BUFFER] = GPU_SLOT_STORAGE_BUFFER, [SPV_SAMPLED_TEXTURE] = GPU_SLOT_SAMPLED_TEXTURE, [SPV_STORAGE_TEXTURE] = GPU_SLOT_STORAGE_TEXTURE, [SPV_SAMPLER] = GPU_SLOT_SAMPLER }; gpu_phase phases[] = { [STAGE_VERTEX] = GPU_PHASE_SHADER_VERTEX, [STAGE_FRAGMENT] = GPU_PHASE_SHADER_FRAGMENT, [STAGE_COMPUTE] = GPU_PHASE_SHADER_COMPUTE }; gpu_slot_type type = types[resource->type]; gpu_phase phase = phases[stage]; // Merge resources between shader stages, by name bool merged = false; uint32_t hash = (uint32_t) hash64(resource->name, strlen(resource->name)); for (uint32_t j = 0; j < lastResourceCount; j++) { ShaderResource* other = &shader->resources[j]; if (other->hash == hash) { lovrCheck(other->type == type, "Shader variable '%s' is declared in multiple shader stages with different types", resource->name); *set = resourceSet; *binding = shader->resources[j].binding; shader->resources[j].phase |= phase; merged = true; break; } } if (merged) { continue; } uint32_t index = shader->resourceCount++; lovrCheck(index < MAX_SHADER_RESOURCES, "Shader resource count exceeds resourcesPerShader limit (%d)", MAX_SHADER_RESOURCES); lovrCheck(resource->type != SPV_COMBINED_TEXTURE_SAMPLER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " combined texture sampler", " (use e.g. texture2D instead of sampler2D)"); lovrCheck(resource->type != SPV_UNIFORM_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " uniform texel buffer", ""); lovrCheck(resource->type != SPV_STORAGE_TEXEL_BUFFER, "Shader variable '%s' is a%s, which is not supported%s", resource->name, " storage texel buffer", ""); lovrCheck(resource->type != SPV_INPUT_ATTACHMENT, "Shader variable '%s' is a%s, which is not supported%s", resource->name, "n input attachment", ""); lovrCheck(resource->arraySize == 0, "Arrays of resources in shaders are not currently supported"); // Move resources into set #2 and give them auto-incremented binding numbers starting at zero // Compute shaders don't need remapping since everything's in set #0 and there are no builtins if (!info->isDefault && info->type == SHADER_GRAPHICS && *set == 0 && *binding > LAST_BUILTIN_BINDING) { *set = resourceSet; *binding = index; } bool buffer = resource->type == SPV_UNIFORM_BUFFER || resource->type == SPV_STORAGE_BUFFER; bool texture = resource->type == SPV_SAMPLED_TEXTURE || resource->type == SPV_STORAGE_TEXTURE; bool sampler = resource->type == SPV_SAMPLER; bool storage = resource->type == SPV_STORAGE_BUFFER || resource->type == SPV_STORAGE_TEXTURE; shader->bufferMask |= (buffer << index); shader->textureMask |= (texture << index); shader->samplerMask |= (sampler << index); shader->storageMask |= (storage << index); gpu_cache cache; if (storage) { cache = info->type == SHADER_COMPUTE ? GPU_CACHE_STORAGE_WRITE : GPU_CACHE_STORAGE_READ; } else { cache = texture ? GPU_CACHE_TEXTURE : GPU_CACHE_UNIFORM; } shader->resources[index] = (ShaderResource) { .hash = hash, .binding = *binding, .type = type, .phase = phase, .cache = cache }; if (buffer && resource->bufferFields) { spv_field* field = &resource->bufferFields[0]; // The following conversions take place, for convenience and to better match Buffer formats: // - Struct containing either single struct or single array of structs gets unwrapped // - Struct containing single array of non-structs gets converted to array of single-field structs if (field->fieldCount == 1 && field->totalFieldCount > 1) { field = &field->fields[0]; } else if (field->totalFieldCount == 1 && field->fields[0].arrayLength > 0) { spv_field* child = &field->fields[0]; field->arrayLength = child->arrayLength; field->arrayStride = child->arrayStride; field->elementSize = child->elementSize; field->type = child->type; child->arrayLength = 0; child->arrayStride = 0; } shader->resources[index].fieldCount = field->totalFieldCount + 1; shader->resources[index].format = shader->fields + ((s == 1 ? spv[0].fieldCount : 0) + (field - spv[s].fields)); } } } // Fields char* name = shader->names; for (uint32_t s = 0; s < info->stageCount; s++) { for (uint32_t i = 0; i < spv[s].fieldCount; i++) { static const DataType dataTypes[] = { [SPV_B32] = TYPE_U32, [SPV_I32] = TYPE_I32, [SPV_I32x2] = TYPE_I32x2, [SPV_I32x3] = TYPE_I32x3, [SPV_I32x4] = TYPE_I32x4, [SPV_U32] = TYPE_U32, [SPV_U32x2] = TYPE_U32x2, [SPV_U32x3] = TYPE_U32x3, [SPV_U32x4] = TYPE_U32x4, [SPV_F32] = TYPE_F32, [SPV_F32x2] = TYPE_F32x2, [SPV_F32x3] = TYPE_F32x3, [SPV_F32x4] = TYPE_F32x4, [SPV_MAT2x2] = TYPE_MAT2, [SPV_MAT2x3] = ~0u, [SPV_MAT2x4] = ~0u, [SPV_MAT3x2] = ~0u, [SPV_MAT3x3] = TYPE_MAT3, [SPV_MAT3x4] = ~0u, [SPV_MAT4x2] = ~0u, [SPV_MAT4x3] = ~0u, [SPV_MAT4x4] = TYPE_MAT4, [SPV_STRUCT] = ~0u }; spv_field* field = &spv[s].fields[i]; uint32_t base = s == 1 ? spv[0].fieldCount : 0; shader->fields[base + i] = (DataField) { .type = dataTypes[field->type], .offset = field->offset, .length = field->arrayLength, .stride = field->arrayLength > 0 ? field->arrayStride : field->elementSize, // Use stride as element size for non-arrays .fieldCount = field->fieldCount, .fields = field->fields ? shader->fields + base + (field->fields - spv[s].fields) : NULL }; if (field->name) { size_t length = strlen(field->name); memcpy(name, field->name, length); shader->fields[base + i].hash = (uint32_t) hash64(name, length); shader->fields[base + i].name = name; name[length] = '\0'; name += length + 1; } } } // Specialization constants for (uint32_t s = 0; s < info->stageCount; s++) { for (uint32_t i = 0; i < spv[s].specConstantCount; i++) { spv_spec_constant* constant = &spv[s].specConstants[i]; bool append = true; if (s > 0) { for (uint32_t j = 0; j < spv[0].specConstantCount; j++) { spv_spec_constant* other = &spv[0].specConstants[j]; if (other->id == constant->id) { lovrCheck(other->type == constant->type, "Shader flag (%d) does not use a consistent type", constant->id); lovrCheck(!strcmp(constant->name, other->name), "Shader flag (%d) does not use a consistent name", constant->id); append = false; break; } } } if (!append) { break; } static const gpu_flag_type flagTypes[] = { [SPV_B32] = GPU_FLAG_B32, [SPV_I32] = GPU_FLAG_I32, [SPV_U32] = GPU_FLAG_U32, [SPV_F32] = GPU_FLAG_F32 }; uint32_t index = shader->flagCount++; // Flag names can start with flag_ which will be ignored for matching purposes if (constant->name) { size_t length = strlen(constant->name); size_t offset = length > 5 && !memcmp(constant->name, "flag_", 5) ? 5 : 0; shader->flagLookup[index] = (uint32_t) hash64(constant->name + offset, length - offset); } else { shader->flagLookup[index] = 0; } shader->flags[index] = (gpu_shader_flag) { .id = constant->id, .type = flagTypes[constant->type] }; } } // Layout gpu_slot* slots = tempAlloc(&state.allocator, shader->resourceCount * sizeof(gpu_slot)); for (uint32_t i = 0; i < shader->resourceCount; i++) { ShaderResource* resource = &shader->resources[i]; slots[i] = (gpu_slot) { .number = resource->binding, .type = resource->type, .stages = ((resource->phase & GPU_PHASE_SHADER_VERTEX) ? GPU_STAGE_VERTEX : 0) | ((resource->phase & GPU_PHASE_SHADER_FRAGMENT) ? GPU_STAGE_FRAGMENT : 0) | ((resource->phase & GPU_PHASE_SHADER_COMPUTE) ? GPU_STAGE_COMPUTE : 0) }; } shader->layout = getLayout(slots, shader->resourceCount); gpu_shader_info gpu = { .stageCount = info->stageCount, .stages = tempAlloc(&state.allocator, info->stageCount * sizeof(gpu_shader_source)), .label = info->label }; for (uint32_t i = 0; i < info->stageCount; i++) { const uint32_t stageMap[] = { [STAGE_VERTEX] = GPU_STAGE_VERTEX, [STAGE_FRAGMENT] = GPU_STAGE_FRAGMENT, [STAGE_COMPUTE] = GPU_STAGE_COMPUTE }; gpu.stages[i] = (gpu_shader_source) { .stage = stageMap[info->stages[i].stage], .code = source[i], .length = info->stages[i].size }; } for (uint32_t i = 0; i < info->stageCount; i++) { if (spv[i].pushConstants) { gpu.pushConstantSize = MAX(gpu.pushConstantSize, spv[i].pushConstants->elementSize); } } gpu_layout* resourceLayout = state.layouts.data[shader->layout].gpu; gpu_layout* uniformsLayout = shader->uniformSize > 0 ? state.layouts.data[LAYOUT_UNIFORMS].gpu : NULL; if (info->type == SHADER_GRAPHICS) { gpu.layouts[0] = state.layouts.data[LAYOUT_BUILTINS].gpu; gpu.layouts[1] = state.layouts.data[LAYOUT_MATERIAL].gpu; gpu.layouts[2] = resourceLayout; gpu.layouts[3] = uniformsLayout; } else { gpu.layouts[0] = resourceLayout; gpu.layouts[1] = uniformsLayout; } gpu_shader_init(shader->gpu, &gpu); lovrShaderInit(shader); tempPop(&state.allocator, stack); return shader; } Shader* lovrShaderClone(Shader* parent, ShaderFlag* flags, uint32_t count) { Shader* shader = lovrCalloc(sizeof(Shader) + gpu_sizeof_shader()); shader->ref = 1; lovrRetain(parent); shader->parent = parent; shader->gpu = parent->gpu; shader->info = parent->info; shader->info.flags = flags; shader->info.flagCount = count; shader->layout = parent->layout; shader->stageMask = parent->stageMask; shader->bufferMask = parent->bufferMask; shader->textureMask = parent->textureMask; shader->samplerMask = parent->samplerMask; shader->storageMask = parent->storageMask; shader->uniformSize = parent->uniformSize; shader->uniformCount = parent->uniformCount; shader->resourceCount = parent->resourceCount; shader->flagCount = parent->flagCount; shader->attributes = parent->attributes; shader->resources = parent->resources; shader->uniforms = parent->uniforms; shader->fields = parent->fields; shader->names = parent->names; shader->flags = lovrMalloc(shader->flagCount * sizeof(gpu_shader_flag)); shader->flagLookup = lovrMalloc(shader->flagCount * sizeof(uint32_t)); memcpy(shader->flags, parent->flags, shader->flagCount * sizeof(gpu_shader_flag)); memcpy(shader->flagLookup, parent->flagLookup, shader->flagCount * sizeof(uint32_t)); lovrShaderInit(shader); return shader; } void lovrShaderDestroy(void* ref) { Shader* shader = ref; if (shader->parent) { lovrRelease(shader->parent, lovrShaderDestroy); } else { gpu_shader_destroy(shader->gpu); lovrFree(shader->attributes); lovrFree(shader->resources); lovrFree(shader->fields); lovrFree(shader->names); } lovrFree(shader->flags); lovrFree(shader->flagLookup); lovrFree(shader); } const ShaderInfo* lovrShaderGetInfo(Shader* shader) { return &shader->info; } bool lovrShaderHasStage(Shader* shader, ShaderStage stage) { return shader->stageMask & (1 << stage); } bool lovrShaderHasAttribute(Shader* shader, const char* name, uint32_t location) { if (name) { uint32_t hash = (uint32_t) hash64(name, strlen(name)); for (uint32_t i = 0; i < shader->attributeCount; i++) { if (shader->attributes[i].hash == hash) { return true; } } } else { for (uint32_t i = 0; i < shader->attributeCount; i++) { if (shader->attributes[i].location == location) { return true; } } } return false; } void lovrShaderGetWorkgroupSize(Shader* shader, uint32_t size[3]) { memcpy(size, shader->workgroupSize, 3 * sizeof(uint32_t)); } const DataField* lovrShaderGetBufferFormat(Shader* shader, const char* name, uint32_t* fieldCount) { uint32_t hash = (uint32_t) hash64(name, strlen(name)); ShaderResource* resource = shader->resources; for (uint32_t i = 0; i < shader->resourceCount; i++, resource++) { if (resource->hash == hash && (shader->bufferMask & (1u << resource->binding))) { *fieldCount = resource->fieldCount; return resource->format; } } return NULL; } // Material Material* lovrMaterialCreate(const MaterialInfo* info) { MaterialBlock* block = state.materialBlocks.length > 0 ? &state.materialBlocks.data[state.materialBlock] : NULL; const uint32_t MATERIALS_PER_BLOCK = 256; if (!block || block->head == ~0u || !gpu_is_complete(block->list[block->head].tick)) { bool found = false; for (size_t i = 0; i < state.materialBlocks.length; i++) { block = &state.materialBlocks.data[i]; if (block->head != ~0u && gpu_is_complete(block->list[block->head].tick)) { state.materialBlock = i; found = true; break; } } if (!found) { arr_expand(&state.materialBlocks, 1); lovrAssert(state.materialBlocks.length < UINT16_MAX, "Out of memory"); state.materialBlock = state.materialBlocks.length++; block = &state.materialBlocks.data[state.materialBlock]; block->list = lovrMalloc(MATERIALS_PER_BLOCK * sizeof(Material)); block->bundlePool = lovrMalloc(gpu_sizeof_bundle_pool()); block->bundles = lovrMalloc(MATERIALS_PER_BLOCK * gpu_sizeof_bundle()); for (uint32_t i = 0; i < MATERIALS_PER_BLOCK; i++) { block->list[i].next = i + 1; block->list[i].tick = state.tick - 4; block->list[i].block = (uint16_t) state.materialBlock; block->list[i].index = i; block->list[i].bundle = (gpu_bundle*) ((char*) block->bundles + i * gpu_sizeof_bundle()); block->list[i].hasWritableTexture = false; } block->list[MATERIALS_PER_BLOCK - 1].next = ~0u; block->tail = MATERIALS_PER_BLOCK - 1; block->head = 0; size_t align = state.limits.uniformBufferAlign; size_t bufferSize = MATERIALS_PER_BLOCK * ALIGN(sizeof(MaterialData), align); block->view = getBuffer(GPU_BUFFER_STATIC, bufferSize, align); atomic_fetch_add(&block->view.block->ref, 1); gpu_bundle_pool_info poolInfo = { .bundles = block->bundles, .layout = state.layouts.data[LAYOUT_MATERIAL].gpu, .count = MATERIALS_PER_BLOCK }; gpu_bundle_pool_init(block->bundlePool, &poolInfo); } } Material* material = &block->list[block->head]; block->head = material->next; material->next = ~0u; material->ref = 1; material->info = *info; MaterialData* data; uint32_t stride = ALIGN(sizeof(MaterialData), state.limits.uniformBufferAlign); if (block->view.pointer) { data = (MaterialData*) ((char*) block->view.pointer + material->index * stride); } else { beginFrame(); BufferView staging = getBuffer(GPU_BUFFER_UPLOAD, sizeof(MaterialData), 4); gpu_copy_buffers(state.stream, staging.buffer, block->view.buffer, staging.offset, block->view.offset + stride * material->index, sizeof(MaterialData)); state.barrier.prev |= GPU_PHASE_COPY; state.barrier.next |= GPU_PHASE_SHADER_VERTEX | GPU_PHASE_SHADER_FRAGMENT; state.barrier.flush |= GPU_CACHE_TRANSFER_WRITE; state.barrier.clear |= GPU_CACHE_UNIFORM; data = staging.pointer; } memcpy(data, info, sizeof(MaterialData)); gpu_buffer_binding buffer = { .object = block->view.buffer, .offset = block->view.offset + material->index * stride, .extent = stride }; gpu_binding bindings[8] = { { 0, GPU_SLOT_UNIFORM_BUFFER, .buffer = buffer } }; Texture* textures[] = { info->texture, info->glowTexture, info->metalnessTexture, info->roughnessTexture, info->clearcoatTexture, info->occlusionTexture, info->normalTexture }; for (uint32_t i = 0; i < COUNTOF(textures); i++) { lovrRetain(textures[i]); Texture* texture = textures[i] ? textures[i] : state.defaultTexture; lovrCheck(i == 0 || texture->info.type == TEXTURE_2D, "Material textures must be 2D"); lovrCheck(texture->info.usage & TEXTURE_SAMPLE, "Textures must be created with the 'sample' usage to use them in Materials"); bindings[i + 1] = (gpu_binding) { i + 1, GPU_SLOT_SAMPLED_TEXTURE, .texture = texture->gpu }; material->hasWritableTexture |= texture->info.usage != TEXTURE_SAMPLE; } gpu_bundle_info bundleInfo = { .layout = state.layouts.data[LAYOUT_MATERIAL].gpu, .bindings = bindings, .count = COUNTOF(bindings) }; gpu_bundle_write(&material->bundle, &bundleInfo, 1); return material; } void lovrMaterialDestroy(void* ref) { Material* material = ref; MaterialBlock* block = &state.materialBlocks.data[material->block]; material->tick = state.tick; block->tail = material->index; if (block->head == ~0u) block->head = block->tail; lovrRelease(material->info.texture, lovrTextureDestroy); lovrRelease(material->info.glowTexture, lovrTextureDestroy); lovrRelease(material->info.metalnessTexture, lovrTextureDestroy); lovrRelease(material->info.roughnessTexture, lovrTextureDestroy); lovrRelease(material->info.clearcoatTexture, lovrTextureDestroy); lovrRelease(material->info.occlusionTexture, lovrTextureDestroy); lovrRelease(material->info.normalTexture, lovrTextureDestroy); } const MaterialInfo* lovrMaterialGetInfo(Material* material) { return &material->info; } // Font Font* lovrGraphicsGetDefaultFont(void) { if (!state.defaultFont) { Rasterizer* rasterizer = lovrRasterizerCreate(NULL, 32); state.defaultFont = lovrFontCreate(&(FontInfo) { .rasterizer = rasterizer, .spread = 4. }); lovrRelease(rasterizer, lovrRasterizerDestroy); } return state.defaultFont; } Font* lovrFontCreate(const FontInfo* info) { Font* font = lovrCalloc(sizeof(Font)); font->ref = 1; font->info = *info; lovrRetain(info->rasterizer); arr_init(&font->glyphs, realloc); map_init(&font->glyphLookup, 36); map_init(&font->kerning, 36); font->pixelDensity = lovrRasterizerGetLeading(info->rasterizer); font->lineSpacing = 1.f; font->padding = (uint32_t) ceil(info->spread / 2.); // Initial atlas size must be big enough to hold any of the glyphs float box[4]; font->atlasWidth = 1; font->atlasHeight = 1; lovrRasterizerGetBoundingBox(info->rasterizer, box); uint32_t maxWidth = (uint32_t) ceilf(box[2] - box[0]) + 2 * font->padding; uint32_t maxHeight = (uint32_t) ceilf(box[3] - box[1]) + 2 * font->padding; while (font->atlasWidth < 2 * maxWidth || font->atlasHeight < 2 * maxHeight) { font->atlasWidth <<= 1; font->atlasHeight <<= 1; } return font; } void lovrFontDestroy(void* ref) { Font* font = ref; lovrRelease(font->info.rasterizer, lovrRasterizerDestroy); lovrRelease(font->material, lovrMaterialDestroy); lovrRelease(font->atlas, lovrTextureDestroy); arr_free(&font->glyphs); map_free(&font->glyphLookup); map_free(&font->kerning); lovrFree(font); } const FontInfo* lovrFontGetInfo(Font* font) { return &font->info; } float lovrFontGetPixelDensity(Font* font) { return font->pixelDensity; } void lovrFontSetPixelDensity(Font* font, float pixelDensity) { font->pixelDensity = pixelDensity; } float lovrFontGetLineSpacing(Font* font) { return font->lineSpacing; } void lovrFontSetLineSpacing(Font* font, float spacing) { font->lineSpacing = spacing; } static Glyph* lovrFontGetGlyph(Font* font, uint32_t codepoint, bool* resized) { uint64_t hash = hash64(&codepoint, 4); uint64_t index = map_get(&font->glyphLookup, hash); if (index != MAP_NIL) { if (resized) *resized = false; return &font->glyphs.data[index]; } arr_expand(&font->glyphs, 1); map_set(&font->glyphLookup, hash, font->glyphs.length); Glyph* glyph = &font->glyphs.data[font->glyphs.length++]; glyph->codepoint = codepoint; glyph->advance = lovrRasterizerGetAdvance(font->info.rasterizer, codepoint); if (lovrRasterizerIsGlyphEmpty(font->info.rasterizer, codepoint)) { memset(glyph->box, 0, sizeof(glyph->box)); if (resized) *resized = false; return glyph; } lovrRasterizerGetGlyphBoundingBox(font->info.rasterizer, codepoint, glyph->box); float width = glyph->box[2] - glyph->box[0]; float height = glyph->box[3] - glyph->box[1]; uint32_t pixelWidth = 2 * font->padding + (uint32_t) ceilf(width); uint32_t pixelHeight = 2 * font->padding + (uint32_t) ceilf(height); // If the glyph exceeds the width, start a new row if (font->atlasX + pixelWidth > font->atlasWidth) { font->atlasX = font->atlasWidth == font->atlasHeight ? 0 : font->atlasWidth >> 1; font->atlasY += font->rowHeight; } // If the glyph exceeds the height, expand the atlas if (font->atlasY + pixelHeight > font->atlasHeight) { if (font->atlasWidth == font->atlasHeight) { font->atlasX = font->atlasWidth; font->atlasY = 0; font->atlasWidth <<= 1; font->rowHeight = 0; } else { font->atlasX = 0; font->atlasY = font->atlasHeight; font->atlasHeight <<= 1; font->rowHeight = 0; } } glyph->x = font->atlasX + font->padding; glyph->y = font->atlasY + font->padding; glyph->uv[0] = (uint16_t) ((float) glyph->x / font->atlasWidth * 65535.f + .5f); glyph->uv[1] = (uint16_t) ((float) (glyph->y + height) / font->atlasHeight * 65535.f + .5f); glyph->uv[2] = (uint16_t) ((float) (glyph->x + width) / font->atlasWidth * 65535.f + .5f); glyph->uv[3] = (uint16_t) ((float) glyph->y / font->atlasHeight * 65535.f + .5f); font->atlasX += pixelWidth; font->rowHeight = MAX(font->rowHeight, pixelHeight); beginFrame(); // Atlas resize if (!font->atlas || font->atlasWidth > font->atlas->info.width || font->atlasHeight > font->atlas->info.height) { lovrCheck(font->atlasWidth <= 65536, "Font atlas is way too big!"); Texture* atlas = lovrTextureCreate(&(TextureInfo) { .type = TEXTURE_2D, .format = FORMAT_RGBA8, .width = font->atlasWidth, .height = font->atlasHeight, .layers = 1, .mipmaps = 1, .usage = TEXTURE_SAMPLE | TEXTURE_TRANSFER, .label = "Font Atlas" }); float clear[4] = { 0.f, 0.f, 0.f, 0.f }; gpu_clear_texture(state.stream, atlas->gpu, clear, 0, ~0u, 0, ~0u); // This barrier serves 2 purposes: // - Ensure new atlas clear is finished/flushed before copying to it // - Ensure any unsynchronized pending uploads to old atlas finish before copying to new atlas gpu_barrier barrier; barrier.prev = GPU_PHASE_COPY | GPU_PHASE_CLEAR; barrier.next = GPU_PHASE_COPY; barrier.flush = GPU_CACHE_TRANSFER_WRITE; barrier.clear = GPU_CACHE_TRANSFER_READ | GPU_CACHE_TRANSFER_WRITE; gpu_sync(state.stream, &barrier, 1); if (font->atlas) { uint32_t srcOffset[4] = { 0, 0, 0, 0 }; uint32_t dstOffset[4] = { 0, 0, 0, 0 }; uint32_t extent[3] = { font->atlas->info.width, font->atlas->info.height, 1 }; gpu_copy_textures(state.stream, font->atlas->gpu, atlas->gpu, srcOffset, dstOffset, extent); lovrRelease(font->atlas, lovrTextureDestroy); } font->atlas = atlas; // Material lovrRelease(font->material, lovrMaterialDestroy); font->material = lovrMaterialCreate(&(MaterialInfo) { .data.color = { 1.f, 1.f, 1.f, 1.f }, .data.uvScale = { 1.f, 1.f }, .data.sdfRange = { font->info.spread / font->atlasWidth, font->info.spread / font->atlasHeight }, .texture = font->atlas }); // Recompute all glyph uvs after atlas resize for (size_t i = 0; i < font->glyphs.length; i++) { Glyph* g = &font->glyphs.data[i]; if (g->box[2] - g->box[0] > 0.f) { g->uv[0] = (uint16_t) ((float) g->x / font->atlasWidth * 65535.f + .5f); g->uv[1] = (uint16_t) ((float) (g->y + g->box[3] - g->box[1]) / font->atlasHeight * 65535.f + .5f); g->uv[2] = (uint16_t) ((float) (g->x + g->box[2] - g->box[0]) / font->atlasWidth * 65535.f + .5f); g->uv[3] = (uint16_t) ((float) g->y / font->atlasHeight * 65535.f + .5f); } } if (resized) *resized = true; } size_t stack = tempPush(&state.allocator); float* pixels = tempAlloc(&state.allocator, pixelWidth * pixelHeight * 4 * sizeof(float)); lovrRasterizerGetPixels(font->info.rasterizer, glyph->codepoint, pixels, pixelWidth, pixelHeight, font->info.spread); BufferView view = getBuffer(GPU_BUFFER_UPLOAD, pixelWidth * pixelHeight * 4 * sizeof(uint8_t), 64); float* src = pixels; uint8_t* dst = view.pointer; for (uint32_t y = 0; y < pixelHeight; y++) { for (uint32_t x = 0; x < pixelWidth; x++) { for (uint32_t c = 0; c < 4; c++) { float f = *src++; // CLAMP would evaluate this multiple times *dst++ = (uint8_t) (CLAMP(f, 0.f, 1.f) * 255.f + .5f); } } } uint32_t dstOffset[4] = { glyph->x - font->padding, glyph->y - font->padding, 0, 0 }; uint32_t extent[3] = { pixelWidth, pixelHeight, 1 }; gpu_copy_buffer_texture(state.stream, view.buffer, font->atlas->gpu, view.offset, dstOffset, extent); tempPop(&state.allocator, stack); state.barrier.prev |= GPU_PHASE_COPY; state.barrier.next |= GPU_PHASE_SHADER_FRAGMENT; state.barrier.flush |= GPU_CACHE_TRANSFER_WRITE; state.barrier.clear |= GPU_CACHE_TEXTURE; return glyph; } float lovrFontGetKerning(Font* font, uint32_t first, uint32_t second) { uint32_t codepoints[] = { first, second }; uint64_t hash = hash64(codepoints, sizeof(codepoints)); union { float f32; uint64_t u64; } kerning = { .u64 = map_get(&font->kerning, hash) }; if (kerning.u64 == MAP_NIL) { kerning.f32 = lovrRasterizerGetKerning(font->info.rasterizer, first, second); map_set(&font->kerning, hash, kerning.u64); } return kerning.f32; } float lovrFontGetWidth(Font* font, ColoredString* strings, uint32_t count) { float x = 0.f; float maxWidth = 0.f; float space = lovrFontGetGlyph(font, ' ', NULL)->advance; for (uint32_t i = 0; i < count; i++) { size_t bytes; uint32_t codepoint; uint32_t previous = '\0'; const char* str = strings[i].string; const char* end = strings[i].string + strings[i].length; while ((bytes = utf8_decode(str, end, &codepoint)) > 0) { if (codepoint == ' ' || codepoint == '\t') { x += codepoint == '\t' ? space * 4.f : space; previous = '\0'; str += bytes; continue; } else if (codepoint == '\n') { maxWidth = MAX(maxWidth, x); x = 0.f; previous = '\0'; str += bytes; continue; } else if (codepoint == '\r') { str += bytes; continue; } Glyph* glyph = lovrFontGetGlyph(font, codepoint, NULL); if (previous) x += lovrFontGetKerning(font, previous, codepoint); previous = codepoint; x += glyph->advance; str += bytes; } } return MAX(maxWidth, x) / font->pixelDensity; } void lovrFontGetLines(Font* font, ColoredString* strings, uint32_t count, float wrap, void (*callback)(void* context, const char* string, size_t length), void* context) { size_t totalLength = 0; for (uint32_t i = 0; i < count; i++) { totalLength += strings[i].length; } beginFrame(); size_t stack = tempPush(&state.allocator); char* string = tempAlloc(&state.allocator, totalLength + 1); string[totalLength] = '\0'; size_t cursor = 0; for (uint32_t i = 0; i < count; cursor += strings[i].length, i++) { memcpy(string + cursor, strings[i].string, strings[i].length); } float x = 0.f; float nextWordStartX = 0.f; wrap *= font->pixelDensity; size_t bytes; uint32_t codepoint; uint32_t previous = '\0'; const char* lineStart = string; const char* wordStart = string; const char* end = string + totalLength; float space = lovrFontGetGlyph(font, ' ', NULL)->advance; while ((bytes = utf8_decode(string, end, &codepoint)) > 0) { if (codepoint == ' ' || codepoint == '\t') { x += codepoint == '\t' ? space * 4.f : space; nextWordStartX = x; previous = '\0'; string += bytes; wordStart = string; continue; } else if (codepoint == '\n') { size_t length = string - lineStart; while (string[length] == ' ' || string[length] == '\t') length--; callback(context, lineStart, length); nextWordStartX = 0.f; x = 0.f; previous = '\0'; string += bytes; lineStart = string; wordStart = string; continue; } else if (codepoint == '\r') { string += bytes; continue; } Glyph* glyph = lovrFontGetGlyph(font, codepoint, NULL); // Keming if (previous) x += lovrFontGetKerning(font, previous, codepoint); previous = codepoint; // Wrap if (wordStart != lineStart && x + glyph->advance > wrap) { size_t length = wordStart - lineStart; while (string[length] == ' ' || string[length] == '\t') length--; callback(context, lineStart, length); lineStart = wordStart; x -= nextWordStartX; nextWordStartX = 0.f; previous = '\0'; } // Advance x += glyph->advance; string += bytes; } if (end - lineStart > 0) { callback(context, lineStart, end - lineStart); } tempPop(&state.allocator, stack); } static void aline(GlyphVertex* vertices, uint32_t head, uint32_t tail, float width, HorizontalAlign align) { if (align == ALIGN_LEFT) return; float shift = align / 2.f * width; for (uint32_t i = head; i < tail; i++) { vertices[i].position.x -= shift; } } void lovrFontGetVertices(Font* font, ColoredString* strings, uint32_t count, float wrap, HorizontalAlign halign, VerticalAlign valign, GlyphVertex* vertices, uint32_t* glyphCount, uint32_t* lineCount, Material** material, bool flip) { uint32_t vertexCount = 0; uint32_t lineStart = 0; uint32_t wordStart = 0; *glyphCount = 0; *lineCount = 1; float x = 0.f; float y = 0.f; float wordStartX = 0.f; float prevWordEndX = 0.f; float leading = lovrRasterizerGetLeading(font->info.rasterizer) * font->lineSpacing; float space = lovrFontGetGlyph(font, ' ', NULL)->advance; for (uint32_t i = 0; i < count; i++) { size_t bytes; uint32_t codepoint; uint32_t previous = '\0'; const char* str = strings[i].string; const char* end = strings[i].string + strings[i].length; float rf = lovrMathGammaToLinear(strings[i].color[0]); float gf = lovrMathGammaToLinear(strings[i].color[1]); float bf = lovrMathGammaToLinear(strings[i].color[2]); uint8_t r = (uint8_t) (CLAMP(rf, 0.f, 1.f) * 255.f); uint8_t g = (uint8_t) (CLAMP(gf, 0.f, 1.f) * 255.f); uint8_t b = (uint8_t) (CLAMP(bf, 0.f, 1.f) * 255.f); uint8_t a = (uint8_t) (CLAMP(strings[i].color[3], 0.f, 1.f) * 255.f); while ((bytes = utf8_decode(str, end, &codepoint)) > 0) { if (codepoint == ' ' || codepoint == '\t') { if (previous) prevWordEndX = x; wordStart = vertexCount; x += codepoint == '\t' ? space * 4.f : space; wordStartX = x; previous = '\0'; str += bytes; continue; } else if (codepoint == '\n') { aline(vertices, lineStart, vertexCount, x, halign); lineStart = vertexCount; wordStart = vertexCount; x = 0.f; y -= leading; wordStartX = 0.f; prevWordEndX = 0.f; (*lineCount)++; previous = '\0'; str += bytes; continue; } else if (codepoint == '\r') { str += bytes; continue; } bool resized; Glyph* glyph = lovrFontGetGlyph(font, codepoint, &resized); if (resized) { lovrFontGetVertices(font, strings, count, wrap, halign, valign, vertices, glyphCount, lineCount, material, flip); return; } // Keming if (previous) x += lovrFontGetKerning(font, previous, codepoint); previous = codepoint; // Wrap if (wrap > 0.f && x + glyph->advance > wrap && wordStart != lineStart) { float dx = wordStartX; float dy = leading; // Shift the vertices of the overflowing word down a line and back to the beginning for (uint32_t v = wordStart; v < vertexCount; v++) { vertices[v].position.x -= dx; vertices[v].position.y += flip ? dy : -dy; } aline(vertices, lineStart, wordStart, prevWordEndX, halign); lineStart = wordStart; wordStartX = 0.f; (*lineCount)++; x -= dx; y -= dy; } // Vertices float* bb = glyph->box; uint16_t* uv = glyph->uv; if (flip) { vertices[vertexCount++] = (GlyphVertex) { { x + bb[0], -(y + bb[1]) }, { uv[0], uv[3] }, { r, g, b, a } }; vertices[vertexCount++] = (GlyphVertex) { { x + bb[2], -(y + bb[1]) }, { uv[2], uv[3] }, { r, g, b, a } }; vertices[vertexCount++] = (GlyphVertex) { { x + bb[0], -(y + bb[3]) }, { uv[0], uv[1] }, { r, g, b, a } }; vertices[vertexCount++] = (GlyphVertex) { { x + bb[2], -(y + bb[3]) }, { uv[2], uv[1] }, { r, g, b, a } }; } else { vertices[vertexCount++] = (GlyphVertex) { { x + bb[0], y + bb[3] }, { uv[0], uv[1] }, { r, g, b, a } }; vertices[vertexCount++] = (GlyphVertex) { { x + bb[2], y + bb[3] }, { uv[2], uv[1] }, { r, g, b, a } }; vertices[vertexCount++] = (GlyphVertex) { { x + bb[0], y + bb[1] }, { uv[0], uv[3] }, { r, g, b, a } }; vertices[vertexCount++] = (GlyphVertex) { { x + bb[2], y + bb[1] }, { uv[2], uv[3] }, { r, g, b, a } }; } (*glyphCount)++; // Advance x += glyph->advance; str += bytes; } } // Align last line aline(vertices, lineStart, vertexCount, x, halign); *material = font->material; } // Mesh Mesh* lovrMeshCreate(const MeshInfo* info, void** vertices) { Buffer* buffer = info->vertexBuffer; if (buffer) { lovrCheck(buffer->info.format, "Mesh vertex buffer must have format information"); lovrCheck(!buffer->info.complexFormat, "Mesh vertex buffer must use a format without nested types or arrays"); lovrCheck(info->storage == MESH_GPU, "Mesh storage must be 'gpu' when created from a Buffer"); lovrRetain(buffer); } else { lovrCheck(info->vertexFormat->length > 0, "Mesh must have at least one vertex"); BufferInfo bufferInfo = { .format = info->vertexFormat }; buffer = lovrBufferCreate(&bufferInfo, info->storage == MESH_GPU ? vertices : NULL); if (!vertices) lovrBufferClear(buffer, 0, ~0u, 0); } DataField* format = buffer->info.format; lovrCheck(format->stride <= state.limits.vertexBufferStride, "Mesh vertex buffer stride exceeds the vertexBufferStride limit of this GPU"); lovrCheck(format->fieldCount <= state.limits.vertexAttributes, "Mesh attribute count exceeds the vertexAttributes limit of this GPU"); for (uint32_t i = 0; i < format->fieldCount; i++) { const DataField* attribute = &format->fields[i]; lovrCheck(attribute->offset < 256, "Max Mesh attribute offset is 255"); // Limited by u8 gpu_attribute offset lovrCheck(attribute->type < TYPE_MAT2 || attribute->type > TYPE_MAT4, "Currently, Mesh attributes can not use matrix types"); lovrCheck(attribute->type < TYPE_INDEX16 || attribute->type > TYPE_INDEX32, "Mesh attributes can not use index types"); } Mesh* mesh = lovrCalloc(sizeof(Mesh)); mesh->ref = 1; mesh->vertexBuffer = buffer; mesh->storage = info->storage; mesh->mode = DRAW_TRIANGLES; if (info->vertexBuffer) { lovrRetain(info->vertexBuffer); } else if (mesh->storage == MESH_CPU) { mesh->vertices = vertices ? lovrMalloc(buffer->info.size) : lovrCalloc(buffer->info.size); if (vertices) { *vertices = mesh->vertices; mesh->dirtyVertices[0] = 0; mesh->dirtyVertices[1] = format->length; } else { mesh->dirtyVertices[0] = ~0u; mesh->dirtyVertices[1] = 0; } } return mesh; } void lovrMeshDestroy(void* ref) { Mesh* mesh = ref; lovrRelease(mesh->vertexBuffer, lovrBufferDestroy); lovrRelease(mesh->indexBuffer, lovrBufferDestroy); lovrRelease(mesh->material, lovrMaterialDestroy); lovrFree(mesh->vertices); lovrFree(mesh->indices); lovrFree(mesh); } const DataField* lovrMeshGetVertexFormat(Mesh* mesh) { return mesh->vertexBuffer->info.format; } const DataField* lovrMeshGetIndexFormat(Mesh* mesh) { return mesh->indexCount > 0 || !mesh->indexBuffer ? mesh->indexBuffer->info.format : NULL; } Buffer* lovrMeshGetVertexBuffer(Mesh* mesh) { return mesh->storage == MESH_CPU ? NULL : mesh->vertexBuffer; } Buffer* lovrMeshGetIndexBuffer(Mesh* mesh) { return mesh->storage == MESH_CPU ? NULL : mesh->indexBuffer; } void lovrMeshSetIndexBuffer(Mesh* mesh, Buffer* buffer) { lovrCheck(mesh->storage == MESH_GPU, "Mesh can only use a Buffer for indices if it was created with 'gpu' storage mode"); DataField* format = buffer->info.format; lovrCheck(format, "Mesh index buffer must have been created with a format"); DataType type = format[1].type; if (format->fieldCount > 1 || (type != TYPE_U16 && type != TYPE_U32 && type != TYPE_INDEX16 && type != TYPE_INDEX32)) { lovrThrow("Mesh index buffer must use the u16, u32, index16, or index32 type"); } else { uint32_t stride = (type == TYPE_U16 || type == TYPE_INDEX16) ? 2 : 4; lovrCheck(format->stride == stride && format[1].offset == 0, "Mesh index buffer must be tightly packed"); } lovrRelease(mesh->indexBuffer, lovrBufferDestroy); mesh->indexBuffer = buffer; mesh->indexCount = format->length; lovrRetain(buffer); } void* lovrMeshGetVertices(Mesh* mesh, uint32_t index, uint32_t count) { const DataField* format = lovrMeshGetVertexFormat(mesh); if (count == ~0u) count = format->length - index; lovrCheck(index < format->length && count <= format->length - index, "Mesh vertex range [%d,%d] overflows mesh capacity", index + 1, index + 1 + count - 1); if (mesh->storage == MESH_CPU) { return (char*) mesh->vertices + index * format->stride; } else { return lovrBufferGetData(mesh->vertexBuffer, index * format->stride, count * format->stride); } } void* lovrMeshSetVertices(Mesh* mesh, uint32_t index, uint32_t count) { const DataField* format = lovrMeshGetVertexFormat(mesh); if (count == ~0u) count = format->length - index; lovrCheck(index < format->length && count <= format->length - index, "Mesh vertex range [%d,%d] overflows mesh capacity", index + 1, index + 1 + count - 1); if (mesh->storage == MESH_CPU) { mesh->dirtyVertices[0] = MIN(mesh->dirtyVertices[0], index); mesh->dirtyVertices[1] = MAX(mesh->dirtyVertices[1], index + count); return (char*) mesh->vertices + index * format->stride; } else { return lovrBufferSetData(mesh->vertexBuffer, index * format->stride, count * format->stride); } } void* lovrMeshGetIndices(Mesh* mesh, uint32_t* count, DataType* type) { if (mesh->indexCount == 0 || !mesh->indexBuffer) { return NULL; } *count = mesh->indexCount; *type = mesh->indexBuffer->info.format[1].type; if (mesh->storage == MESH_CPU) { return mesh->indices; } else { return lovrBufferGetData(mesh->indexBuffer, 0, mesh->indexCount * mesh->indexBuffer->info.format->stride); } } void* lovrMeshSetIndices(Mesh* mesh, uint32_t count, DataType type) { const DataField* format = mesh->indexBuffer ? mesh->indexBuffer->info.format : NULL; mesh->indexCount = count; mesh->dirtyIndices = true; if (!mesh->indexBuffer || count > format->length || type != format[1].type) { lovrRelease(mesh->indexBuffer, lovrBufferDestroy); uint32_t stride = (type == TYPE_U16 || type == TYPE_INDEX16) ? 2 : 4; DataField format[2] = { { .length = count, .stride = stride, .fieldCount = 1 }, { .type = type } }; BufferInfo info = { .format = format }; if (mesh->storage == MESH_CPU) { mesh->indexBuffer = lovrBufferCreate(&info, NULL); mesh->indices = realloc(mesh->indices, count * stride); lovrAssert(mesh->indices, "Out of memory"); return mesh->indices; } else { void* data = NULL; mesh->indexBuffer = lovrBufferCreate(&info, &data); return data; } } else if (mesh->storage == MESH_CPU) { return mesh->indices; } else { return lovrBufferSetData(mesh->indexBuffer, 0, count * format->stride); } } static float* lovrMeshGetPositions(Mesh* mesh) { if (mesh->storage == MESH_GPU) return NULL; const DataField* format = lovrMeshGetVertexFormat(mesh); uint32_t positionHash = (uint32_t) hash64("VertexPosition", strlen("VertexPosition")); for (uint32_t i = 0; i < format->fieldCount; i++) { const DataField* attribute = &format->fields[i]; if (attribute->type != TYPE_F32x3) continue; if ((attribute->hash == LOCATION_POSITION || attribute->hash == positionHash)) { return (float*) ((char*) mesh->vertices + attribute->offset); } } return NULL; } void lovrMeshGetTriangles(Mesh* mesh, float** vertices, uint32_t** indices, uint32_t* vertexCount, uint32_t* indexCount) { float* position = lovrMeshGetPositions(mesh); lovrCheck(mesh->storage == MESH_CPU, "Mesh storage mode must be 'cpu'"); lovrCheck(mesh->mode == DRAW_TRIANGLES, "Mesh draw mode must be 'triangles'"); lovrCheck(position, "Mesh has no VertexPosition attribute with vec3 type"); const DataField* format = lovrMeshGetVertexFormat(mesh); *vertices = lovrMalloc(format->length * 3 * sizeof(float)); for (uint32_t i = 0; i < format->length; i++) { vec3_init(*vertices, position); position = (float*) ((char*) position + format->stride); *vertices += 3; } if (mesh->indexCount > 0) { *indexCount = mesh->indexCount; *indices = lovrMalloc(*indexCount * sizeof(uint32_t)); if (mesh->indexBuffer->info.format[1].type == TYPE_U16 || mesh->indexBuffer->info.format[1].type == TYPE_INDEX16) { for (uint32_t i = 0; i < mesh->indexCount; i++) { *indices[i] = (uint32_t) ((uint16_t*) mesh->indices)[i]; } } else { memcpy(*indices, mesh->indices, mesh->indexCount * sizeof(uint32_t)); } } else { *indexCount = format->length; *indices = lovrMalloc(*indexCount * sizeof(uint32_t)); lovrCheck(format->length >= 3 && format->length % 3 == 0, "Mesh vertex count must be divisible by 3"); for (uint32_t i = 0; i < format->length; i++) { **indices = i; *indices += 1; } } } bool lovrMeshGetBoundingBox(Mesh* mesh, float box[6]) { box[0] = mesh->bounds[0] - mesh->bounds[3]; box[1] = mesh->bounds[0] + mesh->bounds[3]; box[2] = mesh->bounds[1] - mesh->bounds[4]; box[3] = mesh->bounds[1] + mesh->bounds[4]; box[4] = mesh->bounds[2] - mesh->bounds[5]; box[5] = mesh->bounds[2] + mesh->bounds[5]; return mesh->hasBounds; } void lovrMeshSetBoundingBox(Mesh* mesh, float box[6]) { if (box) { mesh->bounds[0] = (box[0] + box[1]) / 2.f; mesh->bounds[1] = (box[2] + box[3]) / 2.f; mesh->bounds[2] = (box[4] + box[5]) / 2.f; mesh->bounds[3] = (box[1] - box[0]) / 2.f; mesh->bounds[4] = (box[3] - box[2]) / 2.f; mesh->bounds[5] = (box[5] - box[4]) / 2.f; mesh->hasBounds = true; } else { mesh->hasBounds = false; } } bool lovrMeshComputeBoundingBox(Mesh* mesh) { const DataField* format = lovrMeshGetVertexFormat(mesh); float* position = lovrMeshGetPositions(mesh); if (!position) { return false; } float box[6] = { FLT_MAX, FLT_MIN, FLT_MAX, FLT_MIN, FLT_MAX, FLT_MIN }; for (uint32_t i = 0; i < format->length; i++, position = (float*) ((char*) position + format->stride)) { box[0] = MIN(box[0], position[0]); box[1] = MAX(box[1], position[0]); box[2] = MIN(box[2], position[1]); box[3] = MAX(box[3], position[1]); box[4] = MIN(box[4], position[2]); box[5] = MAX(box[5], position[2]); } lovrMeshSetBoundingBox(mesh, box); return true; } DrawMode lovrMeshGetDrawMode(Mesh* mesh) { return mesh->mode; } void lovrMeshSetDrawMode(Mesh* mesh, DrawMode mode) { mesh->mode = mode; } void lovrMeshGetDrawRange(Mesh* mesh, uint32_t* start, uint32_t* count, uint32_t* offset) { *start = mesh->drawStart; *count = mesh->drawCount; *offset = mesh->baseVertex; } void lovrMeshSetDrawRange(Mesh* mesh, uint32_t start, uint32_t count, uint32_t offset) { uint32_t vertexCount = mesh->vertexBuffer->info.format->length; uint32_t extent = mesh->indexCount > 0 ? mesh->indexCount : vertexCount; lovrCheck(start < extent && count <= extent - start, "Invalid draw range [%d,%d]", start + 1, start + 1 + count); lovrCheck(offset < vertexCount, "Mesh vertex offset must be less than the vertex count"); mesh->drawStart = start; mesh->drawCount = count; mesh->baseVertex = offset; } Material* lovrMeshGetMaterial(Mesh* mesh) { return mesh->material; } void lovrMeshSetMaterial(Mesh* mesh, Material* material) { lovrRelease(mesh->material, lovrMaterialDestroy); mesh->material = material; lovrRetain(material); } static void lovrMeshFlush(Mesh* mesh) { if (mesh->storage == MESH_GPU) { return; } if (mesh->dirtyVertices[1] > mesh->dirtyVertices[0]) { uint32_t stride = mesh->vertexBuffer->info.format->stride; uint32_t offset = mesh->dirtyVertices[0] * stride; uint32_t extent = (mesh->dirtyVertices[1] - mesh->dirtyVertices[0]) * stride; void* data = lovrBufferSetData(mesh->vertexBuffer, offset, extent); memcpy(data, (char*) mesh->vertices + offset, extent); mesh->dirtyVertices[0] = ~0u; mesh->dirtyVertices[1] = 0; } if (mesh->dirtyIndices) { uint32_t stride = mesh->indexBuffer->info.format->stride; void* data = lovrBufferSetData(mesh->indexBuffer, 0, mesh->indexCount * stride); memcpy(data, mesh->indices, mesh->indexCount * stride); mesh->dirtyIndices = false; } } // Model Model* lovrModelCreate(const ModelInfo* info) { ModelData* data = info->data; Model* model = lovrCalloc(sizeof(Model)); model->ref = 1; model->info = *info; lovrRetain(info->data); for (uint32_t i = 0; i < data->skinCount; i++) { lovrCheck(data->skins[i].jointCount <= 256, "Currently, the max number of joints per skin is 256"); } // Materials and Textures if (info->materials) { model->textures = lovrCalloc(data->imageCount * sizeof(Texture*)); model->materials = lovrMalloc(data->materialCount * sizeof(Material*)); for (uint32_t i = 0; i < data->materialCount; i++) { MaterialInfo material; ModelMaterial* properties = &data->materials[i]; memcpy(&material.data, properties, sizeof(MaterialData)); struct { uint32_t index; Texture** texture; } textures[] = { { properties->texture, &material.texture }, { properties->glowTexture, &material.glowTexture }, { properties->metalnessTexture, &material.metalnessTexture }, { properties->roughnessTexture, &material.roughnessTexture }, { properties->clearcoatTexture, &material.clearcoatTexture }, { properties->occlusionTexture, &material.occlusionTexture }, { properties->normalTexture, &material.normalTexture } }; for (uint32_t t = 0; t < COUNTOF(textures); t++) { uint32_t index = textures[t].index; Texture** texture = textures[t].texture; if (index == ~0u) { *texture = NULL; } else { if (!model->textures[index]) { model->textures[index] = lovrTextureCreate(&(TextureInfo) { .type = TEXTURE_2D, .usage = TEXTURE_SAMPLE, .format = lovrImageGetFormat(data->images[index]), .width = lovrImageGetWidth(data->images[index], 0), .height = lovrImageGetHeight(data->images[index], 0), .layers = 1, .mipmaps = info->mipmaps || lovrImageGetLevelCount(data->images[index]) > 1 ? ~0u : 1, .srgb = texture == &material.texture || texture == &material.glowTexture, .images = &data->images[index], .imageCount = 1 }); } *texture = model->textures[index]; } } model->materials[i] = lovrMaterialCreate(&material); } } // Buffers char* vertexData = NULL; char* indexData = NULL; char* blendData = NULL; char* skinData = NULL; BufferInfo vertexBufferInfo = { .format = (DataField[]) { { .length = data->vertexCount, .stride = sizeof(ModelVertex), .fieldCount = 5 }, { .type = TYPE_F32x3, .offset = offsetof(ModelVertex, position), .hash = LOCATION_POSITION }, { .type = TYPE_SN10x3, .offset = offsetof(ModelVertex, normal), .hash = LOCATION_NORMAL }, { .type = TYPE_F32x2, .offset = offsetof(ModelVertex, uv), .hash = LOCATION_UV }, { .type = TYPE_UN8x4, .offset = offsetof(ModelVertex, color), .hash = LOCATION_COLOR }, { .type = TYPE_SN10x3, .offset = offsetof(ModelVertex, tangent), .hash = LOCATION_TANGENT } } }; if (data->vertexCount > 0) { model->vertexBuffer = lovrBufferCreate(&vertexBufferInfo, (void**) &vertexData); } if (data->blendShapeVertexCount > 0) { model->blendBuffer = lovrBufferCreate(&(BufferInfo) { .format = (DataField[]) { { .length = data->blendShapeVertexCount, .stride = sizeof(BlendVertex), .fieldCount = 3 }, { .type = TYPE_F32x3, .offset = offsetof(BlendVertex, position) }, { .type = TYPE_F32x3, .offset = offsetof(BlendVertex, normal) }, { .type = TYPE_F32x3, .offset = offsetof(BlendVertex, tangent) } } }, (void**) &blendData); } if (data->skinnedVertexCount > 0) { model->skinBuffer = lovrBufferCreate(&(BufferInfo) { .format = (DataField[]) { { .length = data->skinnedVertexCount, .stride = 8, .fieldCount = 2 }, { .type = TYPE_UN8x4, .offset = 0 }, { .type = TYPE_U8x4, .offset = 4 } } }, (void**) &skinData); } // Dynamic vertices are ones that are blended or skinned. They need a copy of the original vertex if (data->dynamicVertexCount > 0) { vertexBufferInfo.format->length = data->dynamicVertexCount; model->rawVertexBuffer = lovrBufferCreate(&vertexBufferInfo, NULL); beginFrame(); // The vertex buffer may already have a pending copy if its memory was not host-visible, need to // wait for that to complete before copying to the raw vertex buffer gpu_barrier barrier = syncTransfer(&model->vertexBuffer->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_READ); gpu_sync(state.stream, &barrier, 1); Buffer* src = model->vertexBuffer; Buffer* dst = model->rawVertexBuffer; gpu_copy_buffers(state.stream, src->gpu, dst->gpu, src->base, dst->base, data->dynamicVertexCount * sizeof(ModelVertex)); gpu_sync(state.stream, &(gpu_barrier) { .prev = GPU_PHASE_COPY, .next = GPU_PHASE_SHADER_COMPUTE, .flush = GPU_CACHE_TRANSFER_WRITE, .clear = GPU_CACHE_STORAGE_READ | GPU_CACHE_STORAGE_WRITE }, 1); } uint32_t indexSize = data->indexType == U32 ? 4 : 2; if (data->indexCount > 0) { model->indexBuffer = lovrBufferCreate(&(BufferInfo) { .format = (DataField[]) { { .length = data->indexCount, .stride = indexSize, .fieldCount = 1 }, { .type = data->indexType == U32 ? TYPE_INDEX32 : TYPE_INDEX16 } } }, (void**) &indexData); } // Primitives are sorted to simplify animation: // - Skinned primitives come first, ordered by skin // - Primitives with blend shapes are next // - Then "non-dynamic" primitives follow // Within each section primitives are still sorted by their index. size_t stack = tempPush(&state.allocator); uint64_t* primitiveOrder = tempAlloc(&state.allocator, data->primitiveCount * sizeof(uint64_t)); uint32_t* baseVertex = tempAlloc(&state.allocator, data->primitiveCount * sizeof(uint32_t)); for (uint32_t i = 0; i < data->primitiveCount; i++) { uint32_t hi = data->primitives[i].skin; if (hi == ~0u && !!data->primitives[i].blendShapes) hi--; primitiveOrder[i] = ((uint64_t) hi << 32) | i; } qsort(primitiveOrder, data->primitiveCount, sizeof(uint64_t), u64cmp); // Draws model->draws = lovrCalloc(data->primitiveCount * sizeof(DrawInfo)); model->boundingBoxes = lovrMalloc(data->primitiveCount * 6 * sizeof(float)); for (uint32_t i = 0, vertexCursor = 0, indexCursor = 0; i < data->primitiveCount; i++) { ModelPrimitive* primitive = &data->primitives[primitiveOrder[i] & ~0u]; ModelAttribute* position = primitive->attributes[ATTR_POSITION]; DrawInfo* draw = &model->draws[primitiveOrder[i] & ~0u]; switch (primitive->mode) { case DRAW_POINT_LIST: draw->mode = DRAW_POINTS; break; case DRAW_LINE_LIST: draw->mode = DRAW_LINES; break; case DRAW_TRIANGLE_LIST: draw->mode = DRAW_TRIANGLES; break; default: lovrThrow("Model uses an unsupported draw mode (lineloop, linestrip, strip, fan)"); } draw->material = !info->materials || primitive->material == ~0u ? NULL: model->materials[primitive->material]; draw->vertex.buffer = model->vertexBuffer; if (primitive->indices) { draw->index.buffer = model->indexBuffer; draw->start = indexCursor; draw->count = primitive->indices->count; draw->baseVertex = vertexCursor; indexCursor += draw->count; } else { draw->start = vertexCursor; draw->count = position->count; } draw->bounds = model->boundingBoxes + i * 6; draw->bounds[0] = (position->min[0] + position->max[0]) / 2.f; draw->bounds[1] = (position->min[1] + position->max[1]) / 2.f; draw->bounds[2] = (position->min[2] + position->max[2]) / 2.f; draw->bounds[3] = (position->max[0] - position->min[0]) / 2.f; draw->bounds[4] = (position->max[1] - position->min[1]) / 2.f; draw->bounds[5] = (position->max[2] - position->min[2]) / 2.f; baseVertex[i] = vertexCursor; vertexCursor += position->count; } // Vertices for (uint32_t i = 0; i < data->primitiveCount; i++) { ModelPrimitive* primitive = &data->primitives[primitiveOrder[i] & ~0u]; ModelAttribute** attributes = primitive->attributes; uint32_t count = attributes[ATTR_POSITION]->count; size_t stride = sizeof(ModelVertex); lovrModelDataCopyAttribute(data, attributes[ATTR_POSITION], vertexData + 0, F32, 3, false, count, stride, 0); lovrModelDataCopyAttribute(data, attributes[ATTR_NORMAL], vertexData + 12, SN10x3, 1, false, count, stride, 0); lovrModelDataCopyAttribute(data, attributes[ATTR_UV], vertexData + 16, F32, 2, false, count, stride, 0); lovrModelDataCopyAttribute(data, attributes[ATTR_COLOR], vertexData + 24, U8, 4, true, count, stride, 255); lovrModelDataCopyAttribute(data, attributes[ATTR_TANGENT], vertexData + 28, SN10x3, 1, false, count, stride, 0); vertexData += count * stride; if (data->skinnedVertexCount > 0 && primitive->skin != ~0u) { lovrModelDataCopyAttribute(data, attributes[ATTR_JOINTS], skinData + 0, U8, 4, false, count, 8, 0); lovrModelDataCopyAttribute(data, attributes[ATTR_WEIGHTS], skinData + 4, U8, 4, true, count, 8, 0); skinData += count * 8; } if (primitive->indices) { char* indices = data->buffers[primitive->indices->buffer].data + primitive->indices->offset; memcpy(indexData, indices, primitive->indices->count * indexSize); indexData += primitive->indices->count * indexSize; } } // Blend shapes if (data->blendShapeCount > 0) { for (uint32_t i = 0; i < data->blendShapeCount; i++) { if (i == 0 || data->blendShapes[i - 1].node != data->blendShapes[i].node) { model->blendGroupCount++; } } model->blendGroups = lovrMalloc(model->blendGroupCount * sizeof(BlendGroup)); model->blendShapeWeights = lovrMalloc(data->blendShapeCount * sizeof(float)); BlendGroup* group = model->blendGroups; for (uint32_t i = 0; i < data->blendShapeCount; i++) { ModelBlendShape* blendShape = &data->blendShapes[i]; ModelNode* node = &data->nodes[blendShape->node]; uint32_t groupVertexCount = 0; for (uint32_t p = 0; p < node->primitiveCount; p++) { ModelPrimitive* primitive = &data->primitives[node->primitiveIndex + p]; uint32_t vertexCount = primitive->attributes[ATTR_POSITION]->count; size_t stride = sizeof(BlendVertex); ModelBlendData* blendAttributes = &primitive->blendShapes[i - node->blendShapeIndex]; lovrModelDataCopyAttribute(data, blendAttributes->positions, blendData + offsetof(BlendVertex, position), F32, 3, false, vertexCount, stride, 0); lovrModelDataCopyAttribute(data, blendAttributes->normals, blendData + offsetof(BlendVertex, normal), F32, 3, false, vertexCount, stride, 0); lovrModelDataCopyAttribute(data, blendAttributes->tangents, blendData + offsetof(BlendVertex, tangent), F32, 3, false, vertexCount, stride, 0); blendData += vertexCount * stride; groupVertexCount += vertexCount; } if (i == 0 || blendShape[-1].node != blendShape[0].node) { group->index = node->blendShapeIndex; group->count = node->blendShapeCount; group->vertexIndex = baseVertex[node->primitiveIndex]; group->vertexCount = groupVertexCount; group++; } } lovrModelResetBlendShapes(model); } // Transforms model->localTransforms = lovrMalloc(sizeof(NodeTransform) * data->nodeCount); model->globalTransforms = lovrMalloc(16 * sizeof(float) * data->nodeCount); lovrModelResetNodeTransforms(model); tempPop(&state.allocator, stack); return model; } Model* lovrModelClone(Model* parent) { ModelData* data = parent->info.data; Model* model = lovrCalloc(sizeof(Model)); model->ref = 1; model->parent = parent; model->info = parent->info; lovrRetain(parent); model->textures = parent->textures; model->materials = parent->materials; model->rawVertexBuffer = parent->rawVertexBuffer; model->indexBuffer = parent->indexBuffer; model->blendBuffer = parent->blendBuffer; model->skinBuffer = parent->skinBuffer; model->blendGroups = parent->blendGroups; model->blendGroupCount = parent->blendGroupCount; if (parent->vertexBuffer) { model->vertexBuffer = lovrBufferCreate(&parent->vertexBuffer->info, NULL); beginFrame(); gpu_barrier barrier = syncTransfer(&parent->vertexBuffer->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_READ); gpu_sync(state.stream, &barrier, 1); Buffer* src = parent->vertexBuffer; Buffer* dst = model->vertexBuffer; gpu_copy_buffers(state.stream, src->gpu, dst->gpu, src->base, dst->base, parent->vertexBuffer->info.size); gpu_sync(state.stream, &(gpu_barrier) { .prev = GPU_PHASE_COPY, .next = GPU_PHASE_SHADER_COMPUTE, .flush = GPU_CACHE_TRANSFER_WRITE, .clear = GPU_CACHE_STORAGE_READ | GPU_CACHE_STORAGE_WRITE }, 1); } model->draws = lovrMalloc(data->primitiveCount * sizeof(DrawInfo)); for (uint32_t i = 0; i < data->primitiveCount; i++) { model->draws[i] = parent->draws[i]; model->draws[i].vertex.buffer = model->vertexBuffer; } model->blendShapeWeights = lovrMalloc(data->blendShapeCount * sizeof(float)); lovrModelResetBlendShapes(model); model->localTransforms = lovrMalloc(sizeof(NodeTransform) * data->nodeCount); model->globalTransforms = lovrMalloc(16 * sizeof(float) * data->nodeCount); lovrModelResetNodeTransforms(model); return model; } void lovrModelDestroy(void* ref) { Model* model = ref; if (model->parent) { lovrRelease(model->parent, lovrModelDestroy); lovrRelease(model->vertexBuffer, lovrBufferDestroy); lovrFree(model->localTransforms); lovrFree(model->globalTransforms); lovrFree(model->blendShapeWeights); lovrFree(model->meshes); lovrFree(model->draws); lovrFree(model); return; } ModelData* data = model->info.data; if (model->info.materials) { for (uint32_t i = 0; i < data->materialCount; i++) { lovrRelease(model->materials[i], lovrMaterialDestroy); } for (uint32_t i = 0; i < data->imageCount; i++) { lovrRelease(model->textures[i], lovrTextureDestroy); } lovrFree(model->materials); lovrFree(model->textures); } lovrRelease(model->rawVertexBuffer, lovrBufferDestroy); lovrRelease(model->vertexBuffer, lovrBufferDestroy); lovrRelease(model->indexBuffer, lovrBufferDestroy); lovrRelease(model->blendBuffer, lovrBufferDestroy); lovrRelease(model->skinBuffer, lovrBufferDestroy); lovrRelease(model->info.data, lovrModelDataDestroy); lovrFree(model->localTransforms); lovrFree(model->globalTransforms); lovrFree(model->boundingBoxes); lovrFree(model->blendShapeWeights); lovrFree(model->blendGroups); lovrFree(model->meshes); lovrFree(model->draws); lovrFree(model); } const ModelInfo* lovrModelGetInfo(Model* model) { return &model->info; } void lovrModelResetNodeTransforms(Model* model) { ModelData* data = model->info.data; for (uint32_t i = 0; i < data->nodeCount; i++) { NodeTransform* transform = &model->localTransforms[i]; if (data->nodes[i].hasMatrix) { mat4_getPosition(data->nodes[i].transform.matrix, transform->position); mat4_getOrientation(data->nodes[i].transform.matrix, transform->rotation); mat4_getScale(data->nodes[i].transform.matrix, transform->scale); } else { vec3_init(transform->position, data->nodes[i].transform.translation); quat_init(transform->rotation, data->nodes[i].transform.rotation); vec3_init(transform->scale, data->nodes[i].transform.scale); } } model->transformsDirty = true; } void lovrModelResetBlendShapes(Model* model) { ModelData* data = model->info.data; for (uint32_t i = 0; i < data->blendShapeCount; i++) { model->blendShapeWeights[i] = data->blendShapes[i].weight; } model->blendShapesDirty = true; } void lovrModelAnimate(Model* model, uint32_t animationIndex, float time, float alpha) { if (alpha <= 0.f) return; ModelData* data = model->info.data; lovrCheck(animationIndex < data->animationCount, "Invalid animation index '%d' (Model has %d animation%s)", animationIndex + 1, data->animationCount, data->animationCount == 1 ? "" : "s"); ModelAnimation* animation = &data->animations[animationIndex]; time = fmodf(time, animation->duration); size_t stack = tempPush(&state.allocator); for (uint32_t i = 0; i < animation->channelCount; i++) { ModelAnimationChannel* channel = &animation->channels[i]; uint32_t node = channel->nodeIndex; uint32_t keyframe = 0; while (keyframe < channel->keyframeCount && channel->times[keyframe] < time) { keyframe++; } size_t n; switch (channel->property) { case PROP_TRANSLATION: n = 3; break; case PROP_SCALE: n = 3; break; case PROP_ROTATION: n = 4; break; case PROP_WEIGHTS: n = data->nodes[node].blendShapeCount; break; } float* property = tempAlloc(&state.allocator, n * sizeof(float)); // Handle the first/last keyframe case (no interpolation) if (keyframe == 0 || keyframe >= channel->keyframeCount) { size_t index = MIN(keyframe, channel->keyframeCount - 1); // For cubic interpolation, each keyframe has 3 parts, and the actual data is in the middle if (channel->smoothing == SMOOTH_CUBIC) { index = 3 * index + 1; } memcpy(property, channel->data + index * n, n * sizeof(float)); } else { float t1 = channel->times[keyframe - 1]; float t2 = channel->times[keyframe]; float z = (time - t1) / (t2 - t1); switch (channel->smoothing) { case SMOOTH_STEP: memcpy(property, channel->data + (z >= .5f ? keyframe : keyframe - 1) * n, n * sizeof(float)); break; case SMOOTH_LINEAR: memcpy(property, channel->data + (keyframe - 1) * n, n * sizeof(float)); if (channel->property == PROP_ROTATION) { quat_slerp(property, channel->data + keyframe * n, z); } else { float* target = channel->data + keyframe * n; for (uint32_t i = 0; i < n; i++) { property[i] += (target[i] - property[i]) * z; } } break; case SMOOTH_CUBIC: { size_t stride = 3 * n; float* p0 = channel->data + (keyframe - 1) * stride + 1 * n; float* m0 = channel->data + (keyframe - 1) * stride + 2 * n; float* p1 = channel->data + (keyframe - 0) * stride + 1 * n; float* m1 = channel->data + (keyframe - 0) * stride + 0 * n; float dt = t2 - t1; float z2 = z * z; float z3 = z2 * z; float a = 2.f * z3 - 3.f * z2 + 1.f; float b = 2.f * z3 - 3.f * z2 + 1.f; float c = -2.f * z3 + 3.f * z2; float d = (z3 * -z2) * dt; for (size_t j = 0; j < n; j++) { property[j] = a * p0[j] + b * m0[j] + c * p1[j] + d * m1[j]; } break; } default: break; } } if (channel->property == PROP_WEIGHTS) { model->blendShapesDirty = true; } else { model->transformsDirty = true; } float* dst; switch (channel->property) { case PROP_TRANSLATION: dst = model->localTransforms[node].position; break; case PROP_SCALE: dst = model->localTransforms[node].scale; break; case PROP_ROTATION: dst = model->localTransforms[node].rotation; break; case PROP_WEIGHTS: dst = &model->blendShapeWeights[data->nodes[node].blendShapeIndex]; break; } if (alpha >= 1.f) { memcpy(dst, property, n * sizeof(float)); } else { for (uint32_t i = 0; i < n; i++) { dst[i] += (property[i] - dst[i]) * alpha; } } } tempPop(&state.allocator, stack); } float lovrModelGetBlendShapeWeight(Model* model, uint32_t index) { return model->blendShapeWeights[index]; } void lovrModelSetBlendShapeWeight(Model* model, uint32_t index, float weight) { model->blendShapeWeights[index] = weight; model->blendShapesDirty = true; } void lovrModelGetNodeTransform(Model* model, uint32_t node, float position[3], float scale[3], float rotation[4], OriginType origin) { if (origin == ORIGIN_PARENT) { vec3_init(position, model->localTransforms[node].position); vec3_init(scale, model->localTransforms[node].scale); quat_init(rotation, model->localTransforms[node].rotation); } else { if (model->transformsDirty) { updateModelTransforms(model, model->info.data->rootNode, (float[]) MAT4_IDENTITY); model->transformsDirty = false; } mat4_getPosition(model->globalTransforms + 16 * node, position); mat4_getScale(model->globalTransforms + 16 * node, scale); mat4_getOrientation(model->globalTransforms + 16 * node, rotation); } } void lovrModelSetNodeTransform(Model* model, uint32_t node, float position[3], float scale[3], float rotation[4], float alpha) { if (alpha <= 0.f) return; NodeTransform* transform = &model->localTransforms[node]; if (alpha >= 1.f) { if (position) vec3_init(transform->position, position); if (scale) vec3_init(transform->scale, scale); if (rotation) quat_init(transform->rotation, rotation); } else { if (position) vec3_lerp(transform->position, position, alpha); if (scale) vec3_lerp(transform->scale, scale, alpha); if (rotation) quat_slerp(transform->rotation, rotation, alpha); } model->transformsDirty = true; } Buffer* lovrModelGetVertexBuffer(Model* model) { return model->rawVertexBuffer; } Buffer* lovrModelGetIndexBuffer(Model* model) { return model->indexBuffer; } Mesh* lovrModelGetMesh(Model* model, uint32_t index) { ModelData* data = model->info.data; lovrCheck(index < data->primitiveCount, "Invalid mesh index '%d' (Model has %d mesh%s)", index + 1, data->primitiveCount, data->primitiveCount == 1 ? "" : "es"); if (!model->meshes) { model->meshes = lovrCalloc(data->primitiveCount * sizeof(Mesh*)); } if (!model->meshes[index]) { DrawInfo* draw = &model->draws[index]; MeshInfo info = { .vertexBuffer = model->vertexBuffer, .storage = MESH_GPU }; Mesh* mesh = lovrMeshCreate(&info, NULL); if (draw->index.buffer) lovrMeshSetIndexBuffer(mesh, model->indexBuffer); lovrMeshSetDrawMode(mesh, draw->mode); lovrMeshSetDrawRange(mesh, draw->start, draw->count, draw->baseVertex); lovrMeshSetMaterial(mesh, draw->material); memcpy(mesh->bounds, draw->bounds, sizeof(mesh->bounds)); mesh->hasBounds = true; model->meshes[index] = mesh; } return model->meshes[index]; } Texture* lovrModelGetTexture(Model* model, uint32_t index) { ModelData* data = model->info.data; lovrCheck(index < data->imageCount, "Invalid texture index '%d' (Model has %d texture%s)", index + 1, data->imageCount, data->imageCount == 1 ? "" : "s"); return model->textures[index]; } Material* lovrModelGetMaterial(Model* model, uint32_t index) { ModelData* data = model->info.data; lovrCheck(index < data->materialCount, "Invalid material index '%d' (Model has %d material%s)", index + 1, data->materialCount, data->materialCount == 1 ? "" : "s"); return model->materials[index]; } static void lovrModelAnimateVertices(Model* model) { ModelData* data = model->info.data; bool blend = model->blendGroupCount > 0; bool skin = data->skinCount > 0; beginFrame(); if ((!blend && !skin) || (!model->transformsDirty && !model->blendShapesDirty) || model->lastVertexAnimation == state.tick) { return; } if (model->transformsDirty) { updateModelTransforms(model, model->info.data->rootNode, (float[]) MAT4_IDENTITY); model->transformsDirty = false; } if (blend) { Shader* shader = lovrGraphicsGetDefaultShader(SHADER_BLENDER); uint32_t vertexCount = data->dynamicVertexCount; uint32_t blendBufferCursor = 0; uint32_t chunkSize = 64; gpu_binding bindings[] = { { 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->rawVertexBuffer->gpu, model->rawVertexBuffer->base, vertexCount * sizeof(ModelVertex) } }, { 1, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->vertexBuffer->gpu, model->vertexBuffer->base, vertexCount * sizeof(ModelVertex) } }, { 2, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->blendBuffer->gpu, model->blendBuffer->base, model->blendBuffer->info.size } }, { 3, GPU_SLOT_UNIFORM_BUFFER, .buffer = { NULL, 0, chunkSize * sizeof(float) } } }; gpu_compute_begin(state.stream); gpu_bind_pipeline(state.stream, shader->computePipeline, GPU_PIPELINE_COMPUTE); for (uint32_t i = 0; i < model->blendGroupCount; i++) { BlendGroup* group = &model->blendGroups[i]; for (uint32_t j = 0; j < group->count; j += chunkSize) { uint32_t count = MIN(group->count - j, chunkSize); bool first = j == 0; BufferView view = getBuffer(GPU_BUFFER_STREAM, chunkSize * sizeof(float), state.limits.uniformBufferAlign); memcpy(view.pointer, model->blendShapeWeights + group->index + j, count * sizeof(float)); bindings[3].buffer = (gpu_buffer_binding) { view.buffer, view.offset, view.extent }; gpu_bundle* bundle = getBundle(shader->layout, bindings, COUNTOF(bindings)); uint32_t constants[] = { group->vertexIndex, group->vertexCount, count, blendBufferCursor, first }; uint32_t subgroupSize = state.device.subgroupSize; gpu_bind_bundles(state.stream, shader->gpu, &bundle, 0, 1, NULL, 0); gpu_push_constants(state.stream, shader->gpu, constants, sizeof(constants)); gpu_compute(state.stream, (group->vertexCount + subgroupSize - 1) / subgroupSize, 1, 1); if (j + count < group->count) { gpu_sync(state.stream, &(gpu_barrier) { .prev = GPU_PHASE_SHADER_COMPUTE, .next = GPU_PHASE_SHADER_COMPUTE, .flush = GPU_CACHE_STORAGE_WRITE, .clear = GPU_CACHE_STORAGE_READ }, 1); } blendBufferCursor += group->vertexCount * count; } } model->blendShapesDirty = false; } if (skin) { if (blend) { gpu_sync(state.stream, &(gpu_barrier) { .prev = GPU_PHASE_SHADER_COMPUTE, .next = GPU_PHASE_SHADER_COMPUTE, .flush = GPU_CACHE_STORAGE_WRITE, .clear = GPU_CACHE_STORAGE_READ | GPU_CACHE_STORAGE_WRITE }, 1); } else { gpu_compute_begin(state.stream); } Shader* shader = lovrGraphicsGetDefaultShader(SHADER_ANIMATOR); Buffer* sourceBuffer = blend ? model->vertexBuffer : model->rawVertexBuffer; uint32_t count = data->skinnedVertexCount; gpu_binding bindings[] = { { 0, GPU_SLOT_STORAGE_BUFFER, .buffer = { sourceBuffer->gpu, sourceBuffer->base, count * sizeof(ModelVertex) } }, { 1, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->vertexBuffer->gpu, model->vertexBuffer->base, count * sizeof(ModelVertex) } }, { 2, GPU_SLOT_STORAGE_BUFFER, .buffer = { model->skinBuffer->gpu, model->skinBuffer->base, count * 8 } }, { 3, GPU_SLOT_UNIFORM_BUFFER, .buffer = { NULL, 0, 0 } } // Filled in for each skin }; gpu_bind_pipeline(state.stream, shader->computePipeline, GPU_PIPELINE_COMPUTE); for (uint32_t i = 0, baseVertex = 0; i < data->skinCount; i++) { ModelSkin* skin = &data->skins[i]; uint32_t align = state.limits.uniformBufferAlign; BufferView view = getBuffer(GPU_BUFFER_STREAM, skin->jointCount * 16 * sizeof(float), align); bindings[3].buffer = (gpu_buffer_binding) { view.buffer, view.offset, view.extent }; float transform[16]; float* joints = view.pointer; for (uint32_t j = 0; j < skin->jointCount; j++) { mat4_init(transform, model->globalTransforms + 16 * skin->joints[j]); mat4_mul(transform, skin->inverseBindMatrices + 16 * j); memcpy(joints, transform, sizeof(transform)); joints += 16; } gpu_bundle* bundle = getBundle(shader->layout, bindings, COUNTOF(bindings)); gpu_bind_bundles(state.stream, shader->gpu, &bundle, 0, 1, NULL, 0); uint32_t subgroupSize = state.device.subgroupSize; uint32_t maxVerticesPerDispatch = state.limits.workgroupCount[0] * subgroupSize; uint32_t verticesRemaining = skin->vertexCount; while (verticesRemaining > 0) { uint32_t vertexCount = MIN(verticesRemaining, maxVerticesPerDispatch); gpu_push_constants(state.stream, shader->gpu, (uint32_t[2]) { baseVertex, vertexCount }, 8); gpu_compute(state.stream, (vertexCount + subgroupSize - 1) / subgroupSize, 1, 1); verticesRemaining -= vertexCount; baseVertex += vertexCount; } } } gpu_compute_end(state.stream); state.barrier.prev |= GPU_PHASE_SHADER_COMPUTE; state.barrier.next |= GPU_PHASE_INPUT_VERTEX; state.barrier.flush |= GPU_CACHE_STORAGE_WRITE; state.barrier.clear |= GPU_CACHE_VERTEX; model->lastVertexAnimation = state.tick; } // Readback static Readback* lovrReadbackCreate(ReadbackType type) { beginFrame(); Readback* readback = lovrCalloc(sizeof(Readback)); readback->ref = 1; readback->tick = state.tick; readback->type = type; if (!state.oldestReadback) state.oldestReadback = readback; if (state.newestReadback) state.newestReadback->next = readback; state.newestReadback = readback; lovrRetain(readback); return readback; } Readback* lovrReadbackCreateBuffer(Buffer* buffer, uint32_t offset, uint32_t extent) { if (extent == ~0u) extent = buffer->info.size - offset; lovrCheck(offset + extent <= buffer->info.size, "Tried to read past the end of the Buffer"); lovrCheck(!buffer->info.format || offset % buffer->info.format->stride == 0, "Readback offset must be a multiple of Buffer's stride"); lovrCheck(!buffer->info.format || extent % buffer->info.format->stride == 0, "Readback size must be a multiple of Buffer's stride"); Readback* readback = lovrReadbackCreate(READBACK_BUFFER); readback->buffer = buffer; void* data = lovrMalloc(extent); readback->blob = lovrBlobCreate(data, extent, "Readback"); readback->view = getBuffer(GPU_BUFFER_DOWNLOAD, extent, 4); lovrRetain(buffer); gpu_barrier barrier = syncTransfer(&buffer->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_READ); gpu_sync(state.stream, &barrier, 1); gpu_copy_buffers(state.stream, buffer->gpu, readback->view.buffer, buffer->base + offset, readback->view.offset, extent); return readback; } Readback* lovrReadbackCreateTexture(Texture* texture, uint32_t offset[4], uint32_t extent[3]) { if (extent[0] == ~0u) extent[0] = texture->info.width - offset[0]; if (extent[1] == ~0u) extent[1] = texture->info.height - offset[1]; lovrCheck(extent[2] == 1, "Currently, only one layer can be read from a Texture"); lovrCheck(texture->root == texture, "Can not read from a Texture view"); lovrCheck(texture->info.usage & TEXTURE_TRANSFER, "Texture must be created with the 'transfer' usage to read from it"); checkTextureBounds(&texture->info, offset, extent); Readback* readback = lovrReadbackCreate(READBACK_TEXTURE); readback->texture = texture; readback->image = lovrImageCreateRaw(extent[0], extent[1], texture->info.format, texture->info.srgb); readback->view = getBuffer(GPU_BUFFER_DOWNLOAD, measureTexture(texture->info.format, extent[0], extent[1], 1), 64); lovrRetain(texture); gpu_barrier barrier = syncTransfer(&texture->sync, GPU_PHASE_COPY, GPU_CACHE_TRANSFER_READ); gpu_sync(state.stream, &barrier, 1); gpu_copy_texture_buffer(state.stream, texture->gpu, readback->view.buffer, offset, readback->view.offset, extent); return readback; } static Readback* lovrReadbackCreateTimestamp(TimingInfo* times, uint32_t count, BufferView buffer) { Readback* readback = lovrReadbackCreate(READBACK_TIMESTAMP); readback->view = buffer; readback->times = times; readback->count = count; return readback; } void lovrReadbackDestroy(void* ref) { Readback* readback = ref; switch (readback->type) { case READBACK_BUFFER: lovrRelease(readback->buffer, lovrBufferDestroy); lovrRelease(readback->blob, lovrBlobDestroy); break; case READBACK_TEXTURE: lovrRelease(readback->texture, lovrTextureDestroy); lovrRelease(readback->image, lovrImageDestroy); break; case READBACK_TIMESTAMP: for (uint32_t i = 0; i < readback->count; i++) { lovrRelease(readback->times[i].pass, lovrPassDestroy); } lovrFree(readback->times); break; default: break; } lovrFree(readback); } bool lovrReadbackIsComplete(Readback* readback) { return gpu_is_complete(readback->tick); } bool lovrReadbackWait(Readback* readback) { if (lovrReadbackIsComplete(readback)) { return false; } if (readback->tick == state.tick && state.active) { lovrGraphicsSubmit(NULL, 0); } beginFrame(); bool waited = gpu_wait_tick(readback->tick); if (waited) { processReadbacks(); } return waited; } void* lovrReadbackGetData(Readback* readback, DataField** format, uint32_t* count) { if (!lovrReadbackIsComplete(readback)) return NULL; if (readback->type == READBACK_BUFFER && readback->buffer->info.format) { *format = readback->buffer->info.format; *count = (uint32_t) (readback->blob->size / readback->buffer->info.format->stride); return readback->blob->data; } return NULL; } Blob* lovrReadbackGetBlob(Readback* readback) { return lovrReadbackIsComplete(readback) ? readback->blob : NULL; } Image* lovrReadbackGetImage(Readback* readback) { return lovrReadbackIsComplete(readback) ? readback->image : NULL; } // Pass static void* lovrPassAllocate(Pass* pass, size_t size) { return tempAlloc(&pass->allocator, size); } static BufferView lovrPassGetBuffer(Pass* pass, uint32_t size, size_t align) { return allocateBuffer(&pass->buffers, GPU_BUFFER_STREAM, size, align); } static void lovrPassRelease(Pass* pass) { // Chain all of the Pass's full buffers onto the end of the global freelist if (pass->buffers.freelist) { BufferBlock** list = &state.bufferAllocators[GPU_BUFFER_STREAM].freelist; while (*list) list = (BufferBlock**) &(*list)->next; *list = pass->buffers.freelist; pass->buffers.freelist = NULL; } if (pass->pipeline) { for (uint32_t i = 0; i <= pass->pipelineIndex; i++) { lovrRelease(pass->pipeline->material, lovrMaterialDestroy); lovrRelease(pass->pipeline->shader, lovrShaderDestroy); lovrRelease(pass->pipeline->font, lovrFontDestroy); pass->pipeline--; } pass->pipelineIndex = 0; } lovrRelease(pass->sampler, lovrSamplerDestroy); for (uint32_t i = 0; i < pass->computeCount; i++) { lovrRelease(pass->computes[i].shader, lovrShaderDestroy); } for (uint32_t i = 0; i < pass->drawCount; i++) { Draw* draw = &pass->draws[i]; lovrRelease(draw->shader, lovrShaderDestroy); lovrRelease(draw->material, lovrMaterialDestroy); } for (uint32_t i = 0; i < COUNTOF(pass->access); i++) { for (AccessBlock* block = pass->access[i]; block != NULL; block = block->next) { for (uint32_t j = 0; j < block->count; j++) { bool texture = block->textureMask & (1ull << j); lovrRelease(block->list[j].object, texture ? lovrTextureDestroy : lovrBufferDestroy); } } } } Pass* lovrGraphicsGetWindowPass(void) { if (!state.windowPass) { state.windowPass = lovrPassCreate(); } Texture* window = lovrGraphicsGetWindowTexture(); if (!window) { return NULL; // The window may become unavailable during a resize } lovrPassReset(state.windowPass); Texture* textures[4] = { state.window }; memcpy(state.windowPass->canvas.color[0].clear, state.background, 4 * sizeof(float)); lovrPassSetCanvas(state.windowPass, textures, NULL, state.depthFormat, state.config.antialias ? 4 : 1); return state.windowPass; } Pass* lovrPassCreate(void) { Pass* pass = lovrCalloc(sizeof(Pass)); pass->ref = 1; pass->allocator.limit = 1 << 28; pass->allocator.length = 1 << 12; pass->allocator.memory = os_vm_init(pass->allocator.limit); os_vm_commit(pass->allocator.memory, pass->allocator.length); lovrPassReset(pass); return pass; } void lovrPassDestroy(void* ref) { Pass* pass = ref; lovrPassRelease(pass); for (uint32_t i = 0; i < COUNTOF(pass->canvas.color); i++) { lovrRelease(pass->canvas.color[i].texture, lovrTextureDestroy); } lovrRelease(pass->canvas.depth.texture, lovrTextureDestroy); lovrRelease(pass->tally.buffer, lovrBufferDestroy); if (pass->tally.gpu) { gpu_tally_destroy(pass->tally.gpu); lovrRelease(pass->tally.tempBuffer, lovrBufferDestroy); } if (pass->buffers.current) { pass->buffers.current->tick = state.tick; freeBlock(&state.bufferAllocators[GPU_BUFFER_STREAM], pass->buffers.current); } os_vm_free(pass->allocator.memory, pass->allocator.limit); lovrFree(pass); } void lovrPassReset(Pass* pass) { lovrPassRelease(pass); pass->allocator.cursor = 0; pass->access[ACCESS_RENDER] = NULL; pass->access[ACCESS_COMPUTE] = NULL; pass->flags = DIRTY_BINDINGS; pass->transform = lovrPassAllocate(pass, TRANSFORM_STACK_SIZE * 16 * sizeof(float)); pass->pipeline = lovrPassAllocate(pass, PIPELINE_STACK_SIZE * sizeof(Pipeline)); pass->bindings = lovrPassAllocate(pass, 32 * sizeof(gpu_binding)); pass->uniforms = NULL; pass->computeCount = 0; pass->computes = NULL; pass->drawCount = 0; pass->draws = lovrPassAllocate(pass, pass->drawCapacity * sizeof(Draw)); memset(&pass->geocache, 0, sizeof(pass->geocache)); pass->tally.active = false; pass->tally.count = 0; pass->transformIndex = 0; mat4_identity(pass->transform); pass->pipelineIndex = 0; memset(pass->pipeline, 0, sizeof(Pipeline)); pass->pipeline->mode = DRAW_TRIANGLES; pass->pipeline->lastVertexFormat = ~0u; pass->pipeline->color[0] = 1.f; pass->pipeline->color[1] = 1.f; pass->pipeline->color[2] = 1.f; pass->pipeline->color[3] = 1.f; pass->pipeline->info.pass = pass->gpu; pass->pipeline->info.depth.test = GPU_COMPARE_GEQUAL; pass->pipeline->info.depth.write = true; pass->pipeline->info.stencil.testMask = 0xff; pass->pipeline->info.stencil.writeMask = 0xff; for (uint32_t i = 0; i < 4; i++) { lovrPassSetBlendMode(pass, i, BLEND_ALPHA, BLEND_ALPHA_MULTIPLY); pass->pipeline->info.colorMask[i] = 0xf; } pass->cameraCount = 0; if (pass->canvas.views > 0) { float viewMatrix[16]; float projection[16]; mat4_identity(viewMatrix); mat4_perspective(projection, 1.2f, (float) pass->canvas.width / pass->canvas.height, .01f, 0.f); for (uint32_t i = 0; i < pass->canvas.views; i++) { lovrPassSetViewMatrix(pass, i, viewMatrix); lovrPassSetProjection(pass, i, projection); } } memset(pass->viewport, 0, sizeof(pass->viewport)); memset(pass->scissor, 0, sizeof(pass->scissor)); pass->sampler = NULL; } const PassStats* lovrPassGetStats(Pass* pass) { pass->stats.draws = pass->drawCount; pass->stats.computes = pass->computeCount; pass->stats.cpuMemoryReserved = pass->allocator.length; pass->stats.cpuMemoryUsed = pass->allocator.cursor; return &pass->stats; } void lovrPassGetCanvas(Pass* pass, Texture* textures[4], Texture** depthTexture, uint32_t* depthFormat, uint32_t* samples) { for (uint32_t i = 0; i < COUNTOF(pass->canvas.color); i++) { textures[i] = pass->canvas.color[i].texture; } *depthTexture = pass->canvas.depth.texture; *depthFormat = pass->canvas.depth.format; *samples = pass->canvas.samples; } void lovrPassSetCanvas(Pass* pass, Texture* textures[4], Texture* depthTexture, uint32_t depthFormat, uint32_t samples) { Canvas* canvas = &pass->canvas; for (uint32_t i = 0; i < canvas->count; i++) { lovrRelease(canvas->color[i].texture, lovrTextureDestroy); canvas->color[i].texture = NULL; } canvas->count = 0; lovrRelease(canvas->depth.texture, lovrTextureDestroy); canvas->depth.texture = NULL; canvas->depth.format = 0; const TextureInfo* t = textures[0] ? &textures[0]->info : &depthTexture->info; if (textures[0] || depthTexture) { canvas->width = t->width; canvas->height = t->height; canvas->views = t->layers; lovrCheck(t->width <= state.limits.renderSize[0], "Pass canvas width (%d) exceeds the renderSize limit of this GPU (%d)", t->width, state.limits.renderSize[0]); lovrCheck(t->height <= state.limits.renderSize[1], "Pass canvas height (%d) exceeds the renderSize limit of this GPU (%d)", t->height, state.limits.renderSize[1]); lovrCheck(t->layers <= state.limits.renderSize[2], "Pass canvas layer count (%d) exceeds the renderSize limit of this GPU (%d)", t->layers, state.limits.renderSize[2]); lovrCheck(samples == 1 || samples == 4, "Currently MSAA must be 1 or 4"); canvas->samples = samples; canvas->resolve = samples > 1; } else { memset(canvas, 0, sizeof(Canvas)); } for (uint32_t i = 0; i < COUNTOF(canvas->color) && textures[i]; i++, canvas->count++) { const TextureInfo* texture = &textures[i]->info; bool renderable = texture->format == GPU_FORMAT_SURFACE || (state.features.formats[texture->format][texture->srgb] & GPU_FEATURE_RENDER); lovrCheck(!isDepthFormat(texture->format), "Unable to use a depth texture as a color target"); lovrCheck(renderable, "This GPU does not support rendering to the texture format/encoding used by canvas texture #%d", i + 1); lovrCheck(texture->usage & TEXTURE_RENDER, "Texture must be created with the 'render' flag to render to it"); lovrCheck(texture->width == t->width, "Canvas texture sizes must match"); lovrCheck(texture->height == t->height, "Canvas texture sizes must match"); lovrCheck(texture->layers == t->layers, "Canvas texture layer counts must match"); canvas->color[i].texture = textures[i]; lovrRetain(textures[i]); } if (depthTexture) { const TextureInfo* texture = &depthTexture->info; lovrCheck(isDepthFormat(texture->format), "Canvas depth textures must have a depth format"); lovrCheck(texture->usage & TEXTURE_RENDER, "Texture must be created with the 'render' flag to render to it"); lovrCheck(texture->width == t->width, "Canvas texture sizes must match"); lovrCheck(texture->height == t->height, "Canvas texture sizes must match"); lovrCheck(texture->layers == t->layers, "Canvas texture layer counts must match"); lovrCheck(samples == 1 || state.features.depthResolve, "This GPU does not support resolving depth textures, MSAA should be set to 1"); canvas->depth.texture = depthTexture; canvas->depth.format = texture->format; lovrRetain(depthTexture); } else if (depthFormat) { lovrCheck(isDepthFormat(depthFormat), "Expected depth format for canvas depth (received color format)"); lovrCheck(state.features.formats[depthFormat][0] & GPU_FEATURE_RENDER, "Canvas depth format is not supported by this GPU"); canvas->depth.format = depthFormat; } pass->gpu = getPass(canvas); lovrPassReset(pass); } void lovrPassGetClear(Pass* pass, LoadAction loads[4], float clears[4][4], LoadAction* depthLoad, float* depthClear) { for (uint32_t i = 0; i < pass->canvas.count; i++) { loads[i] = pass->canvas.color[i].load; if (pass->canvas.color[i].load == LOAD_CLEAR) { clears[i][0] = lovrMathLinearToGamma(pass->canvas.color[i].clear[0]); clears[i][1] = lovrMathLinearToGamma(pass->canvas.color[i].clear[1]); clears[i][2] = lovrMathLinearToGamma(pass->canvas.color[i].clear[2]); clears[i][3] = pass->canvas.color[i].clear[3]; } } *depthLoad = pass->canvas.depth.load; *depthClear = pass->canvas.depth.clear; } void lovrPassSetClear(Pass* pass, LoadAction loads[4], float clears[4][4], LoadAction depthLoad, float depthClear) { bool dirty = false; for (uint32_t i = 0; i < pass->canvas.count; i++) { dirty |= loads[i] != pass->canvas.color[i].load; pass->canvas.color[i].load = loads[i]; if (loads[i] == LOAD_CLEAR) { pass->canvas.color[i].clear[0] = lovrMathGammaToLinear(clears[i][0]); pass->canvas.color[i].clear[1] = lovrMathGammaToLinear(clears[i][1]); pass->canvas.color[i].clear[2] = lovrMathGammaToLinear(clears[i][2]); pass->canvas.color[i].clear[3] = clears[i][3]; } else { memset(pass->canvas.color[i].clear, 0, 4 * sizeof(float)); } } dirty |= depthLoad != pass->canvas.depth.load; pass->canvas.depth.load = depthLoad; pass->canvas.depth.clear = depthLoad == LOAD_CLEAR ? depthClear : 0.f; if (dirty) pass->gpu = getPass(&pass->canvas); } uint32_t lovrPassGetAttachmentCount(Pass* pass, bool* depth) { if (depth) *depth = pass->canvas.depth.texture || pass->canvas.depth.format; return pass->canvas.count; } uint32_t lovrPassGetWidth(Pass* pass) { return pass->canvas.width; } uint32_t lovrPassGetHeight(Pass* pass) { return pass->canvas.height; } uint32_t lovrPassGetViewCount(Pass* pass) { return pass->canvas.views; } static Camera* getCamera(Pass* pass) { if (pass->flags & DIRTY_CAMERA) { return pass->cameras + (pass->cameraCount - 1) * pass->canvas.views; } uint32_t views = pass->canvas.views; uint32_t stride = sizeof(Camera) * views; uint32_t count = pass->cameraCount; Camera* cameras = lovrPassAllocate(pass, (count + 1) * stride); Camera* newCamera = cameras + count * views; if (pass->cameras) memcpy(cameras, pass->cameras, count * stride); memcpy(newCamera, newCamera - views, count > 0 ? stride : 0); pass->flags |= DIRTY_CAMERA; pass->cameras = cameras; pass->cameraCount++; return newCamera; } void lovrPassGetViewMatrix(Pass* pass, uint32_t index, float viewMatrix[16]) { lovrCheck(index < pass->canvas.views, "Invalid view index '%d'", index + 1); mat4_init(viewMatrix, getCamera(pass)[index].viewMatrix); } void lovrPassSetViewMatrix(Pass* pass, uint32_t index, float viewMatrix[16]) { lovrCheck(index < pass->canvas.views, "Invalid view index '%d'", index + 1); mat4_init(getCamera(pass)[index].viewMatrix, viewMatrix); } void lovrPassGetProjection(Pass* pass, uint32_t index, float projection[16]) { lovrCheck(index < pass->canvas.views, "Invalid view index '%d'", index + 1); mat4_init(projection, getCamera(pass)[index].projection); } void lovrPassSetProjection(Pass* pass, uint32_t index, float projection[16]) { lovrCheck(index < pass->canvas.views, "Invalid view index '%d'", index + 1); mat4_init(getCamera(pass)[index].projection, projection); } void lovrPassGetViewport(Pass* pass, float viewport[6]) { memcpy(viewport, pass->viewport, 6 * sizeof(float)); } void lovrPassSetViewport(Pass* pass, float viewport[6]) { memcpy(pass->viewport, viewport, 6 * sizeof(float)); } void lovrPassGetScissor(Pass* pass, uint32_t scissor[4]) { memcpy(scissor, pass->scissor, 4 * sizeof(uint32_t)); } void lovrPassSetScissor(Pass* pass, uint32_t scissor[4]) { memcpy(pass->scissor, scissor, 4 * sizeof(uint32_t)); } void lovrPassPush(Pass* pass, StackType stack) { switch (stack) { case STACK_TRANSFORM: lovrCheck(++pass->transformIndex < TRANSFORM_STACK_SIZE, "%s stack overflow (more pushes than pops?)", "Transform"); mat4_init(pass->transform + 16, pass->transform); pass->transform += 16; break; case STACK_STATE: lovrCheck(++pass->pipelineIndex < PIPELINE_STACK_SIZE, "%s stack overflow (more pushes than pops?)", "Pipeline"); memcpy(pass->pipeline + 1, pass->pipeline, sizeof(Pipeline)); pass->pipeline++; lovrRetain(pass->pipeline->font); lovrRetain(pass->pipeline->shader); lovrRetain(pass->pipeline->material); break; default: break; } } void lovrPassPop(Pass* pass, StackType stack) { switch (stack) { case STACK_TRANSFORM: lovrCheck(--pass->transformIndex < TRANSFORM_STACK_SIZE, "%s stack underflow (more pops than pushes?)", "Transform"); pass->transform -= 16; break; case STACK_STATE: lovrRelease(pass->pipeline->font, lovrFontDestroy); lovrRelease(pass->pipeline->shader, lovrShaderDestroy); lovrRelease(pass->pipeline->material, lovrMaterialDestroy); lovrCheck(--pass->pipelineIndex < PIPELINE_STACK_SIZE, "%s stack underflow (more pops than pushes?)", "Pipeline"); pass->pipeline--; pass->pipeline->dirty = true; break; default: break; } } void lovrPassOrigin(Pass* pass) { mat4_identity(pass->transform); } void lovrPassTranslate(Pass* pass, vec3 translation) { mat4_translate(pass->transform, translation[0], translation[1], translation[2]); } void lovrPassRotate(Pass* pass, quat rotation) { mat4_rotateQuat(pass->transform, rotation); } void lovrPassScale(Pass* pass, vec3 scale) { mat4_scale(pass->transform, scale[0], scale[1], scale[2]); } void lovrPassTransform(Pass* pass, mat4 transform) { mat4_mul(pass->transform, transform); } void lovrPassSetAlphaToCoverage(Pass* pass, bool enabled) { pass->pipeline->dirty |= enabled != pass->pipeline->info.multisample.alphaToCoverage; pass->pipeline->info.multisample.alphaToCoverage = enabled; } void lovrPassSetBlendMode(Pass* pass, uint32_t index, BlendMode mode, BlendAlphaMode alphaMode) { if (mode == BLEND_NONE) { pass->pipeline->dirty |= pass->pipeline->info.blend[index].enabled; memset(&pass->pipeline->info.blend[index], 0, sizeof(gpu_blend_state)); return; } gpu_blend_state* blend = &pass->pipeline->info.blend[index]; static const gpu_blend_state table[] = { [BLEND_ALPHA] = { .color = { GPU_BLEND_SRC_ALPHA, GPU_BLEND_ONE_MINUS_SRC_ALPHA, GPU_BLEND_ADD }, .alpha = { GPU_BLEND_ONE, GPU_BLEND_ONE_MINUS_SRC_ALPHA, GPU_BLEND_ADD } }, [BLEND_ADD] = { .color = { GPU_BLEND_SRC_ALPHA, GPU_BLEND_ONE, GPU_BLEND_ADD }, .alpha = { GPU_BLEND_ZERO, GPU_BLEND_ONE, GPU_BLEND_ADD } }, [BLEND_SUBTRACT] = { .color = { GPU_BLEND_SRC_ALPHA, GPU_BLEND_ONE, GPU_BLEND_RSUB }, .alpha = { GPU_BLEND_ZERO, GPU_BLEND_ONE, GPU_BLEND_RSUB } }, [BLEND_MULTIPLY] = { .color = { GPU_BLEND_DST_COLOR, GPU_BLEND_ZERO, GPU_BLEND_ADD }, .alpha = { GPU_BLEND_DST_COLOR, GPU_BLEND_ZERO, GPU_BLEND_ADD } }, [BLEND_LIGHTEN] = { .color = { GPU_BLEND_SRC_ALPHA, GPU_BLEND_ZERO, GPU_BLEND_MAX }, .alpha = { GPU_BLEND_ONE, GPU_BLEND_ZERO, GPU_BLEND_MAX } }, [BLEND_DARKEN] = { .color = { GPU_BLEND_SRC_ALPHA, GPU_BLEND_ZERO, GPU_BLEND_MIN }, .alpha = { GPU_BLEND_ONE, GPU_BLEND_ZERO, GPU_BLEND_MIN } }, [BLEND_SCREEN] = { .color = { GPU_BLEND_SRC_ALPHA, GPU_BLEND_ONE_MINUS_SRC_COLOR, GPU_BLEND_ADD }, .alpha = { GPU_BLEND_ONE, GPU_BLEND_ONE_MINUS_SRC_COLOR, GPU_BLEND_ADD } }, }; *blend = table[mode]; blend->enabled = true; if (alphaMode == BLEND_PREMULTIPLIED && mode != BLEND_MULTIPLY) { blend->color.src = GPU_BLEND_ONE; } pass->pipeline->dirty = true; } void lovrPassSetColor(Pass* pass, float color[4]) { pass->pipeline->color[0] = lovrMathGammaToLinear(color[0]); pass->pipeline->color[1] = lovrMathGammaToLinear(color[1]); pass->pipeline->color[2] = lovrMathGammaToLinear(color[2]); pass->pipeline->color[3] = color[3]; } void lovrPassSetColorWrite(Pass* pass, uint32_t index, bool r, bool g, bool b, bool a) { uint8_t mask = (r << 0) | (g << 1) | (b << 2) | (a << 3); pass->pipeline->dirty |= pass->pipeline->info.colorMask[index] != mask; pass->pipeline->info.colorMask[index] = mask; } void lovrPassSetDepthTest(Pass* pass, CompareMode test) { pass->pipeline->dirty |= pass->pipeline->info.depth.test != (gpu_compare_mode) test; pass->pipeline->info.depth.test = (gpu_compare_mode) test; } void lovrPassSetDepthWrite(Pass* pass, bool write) { pass->pipeline->dirty |= pass->pipeline->info.depth.write != write; pass->pipeline->info.depth.write = write; } void lovrPassSetDepthOffset(Pass* pass, float offset, float sloped) { pass->pipeline->info.rasterizer.depthOffset = offset; pass->pipeline->info.rasterizer.depthOffsetSloped = sloped; pass->pipeline->dirty = true; } void lovrPassSetDepthClamp(Pass* pass, bool clamp) { if (state.features.depthClamp) { pass->pipeline->dirty |= pass->pipeline->info.rasterizer.depthClamp != clamp; pass->pipeline->info.rasterizer.depthClamp = clamp; } } void lovrPassSetFaceCull(Pass* pass, CullMode mode) { pass->pipeline->dirty |= pass->pipeline->info.rasterizer.cullMode != (gpu_cull_mode) mode; pass->pipeline->info.rasterizer.cullMode = (gpu_cull_mode) mode; } void lovrPassSetFont(Pass* pass, Font* font) { if (pass->pipeline->font != font) { lovrRetain(font); lovrRelease(pass->pipeline->font, lovrFontDestroy); pass->pipeline->font = font; } } void lovrPassSetMaterial(Pass* pass, Material* material) { if (!material) material = state.defaultMaterial; if (pass->pipeline->material != material) { lovrRetain(material); lovrRelease(pass->pipeline->material, lovrMaterialDestroy); pass->pipeline->material = material; } } void lovrPassSetMeshMode(Pass* pass, DrawMode mode) { pass->pipeline->mode = mode; } void lovrPassSetSampler(Pass* pass, Sampler* sampler) { if (sampler != pass->sampler) { lovrRetain(sampler); lovrRelease(pass->sampler, lovrSamplerDestroy); pass->sampler = sampler; } } void lovrPassSetShader(Pass* pass, Shader* shader) { Shader* old = pass->pipeline->shader; if (shader == old) { return; } if (shader) { gpu_binding bindings[32]; // Ensure there's a valid binding for every resource in the new shader. If the old shader had a // binding with the same name and type, then use that, otherwise use a "default" resource. for (uint32_t i = 0; i < shader->resourceCount; i++) { ShaderResource* resource = &shader->resources[i]; bool useDefault = true; if (old) { ShaderResource* other = old->resources; for (uint32_t j = 0; j < old->resourceCount; j++, other++) { if (other->hash == resource->hash && other->type == resource->type) { bindings[resource->binding] = pass->bindings[other->binding]; useDefault = false; break; } } } if (useDefault) { switch (resource->type) { case GPU_SLOT_UNIFORM_BUFFER: case GPU_SLOT_STORAGE_BUFFER: bindings[i].buffer.object = state.defaultBuffer->gpu; bindings[i].buffer.offset = state.defaultBuffer->base; bindings[i].buffer.extent = state.defaultBuffer->info.size; break; case GPU_SLOT_SAMPLED_TEXTURE: case GPU_SLOT_STORAGE_TEXTURE: bindings[i].texture = state.defaultTexture->gpu; break; case GPU_SLOT_SAMPLER: bindings[i].sampler = state.defaultSamplers[FILTER_LINEAR]->gpu; break; default: break; } } } memcpy(pass->bindings, bindings, shader->resourceCount * sizeof(gpu_binding)); pass->flags |= DIRTY_BINDINGS; // Uniform data is preserved for uniforms with the same name/size (this might be slow...) if (shader->uniformCount > 0) { void* uniforms = lovrPassAllocate(pass, shader->uniformSize); if (old && old->uniformCount > 0) { for (uint32_t i = 0; i < shader->uniformCount; i++) { DataField* uniform = &shader->uniforms[i]; DataField* other = old->uniforms; for (uint32_t j = 0; j < old->uniformCount; j++, other++) { if (uniform->hash == other->hash && uniform->stride == other->stride && uniform->length == other->length) { void* src = (char*) pass->uniforms + other->offset; void* dst = (char*) uniforms + uniform->offset; size_t size = uniform->stride * MAX(uniform->length, 1); memcpy(dst, src, size); } } } } else { memset(uniforms, 0, shader->uniformSize); } pass->uniforms = uniforms; pass->flags |= DIRTY_UNIFORMS; } // Custom vertex attributes must be reset: their locations may differ even if the names match if (shader->hasCustomAttributes) { pass->pipeline->lastVertexBuffer = NULL; } pass->pipeline->info.shader = shader->gpu; pass->pipeline->info.flags = shader->flags; pass->pipeline->info.flagCount = shader->overrideCount; lovrRetain(shader); } lovrRelease(old, lovrShaderDestroy); pass->pipeline->shader = shader; pass->pipeline->dirty = true; } void lovrPassSetStencilTest(Pass* pass, CompareMode test, uint8_t value, uint8_t mask) { TextureFormat depthFormat = pass->canvas.depth.texture ? pass->canvas.depth.texture->info.format : pass->canvas.depth.format; lovrCheck(depthFormat == FORMAT_D32FS8 || depthFormat == FORMAT_D24S8, "Trying to set stencil mode, but Pass depth texture does not use a stencil format"); bool hasReplace = false; hasReplace |= pass->pipeline->info.stencil.failOp == GPU_STENCIL_REPLACE; hasReplace |= pass->pipeline->info.stencil.depthFailOp == GPU_STENCIL_REPLACE; hasReplace |= pass->pipeline->info.stencil.passOp == GPU_STENCIL_REPLACE; if (hasReplace && test != COMPARE_NONE) { lovrCheck(value == pass->pipeline->info.stencil.value, "When stencil write is 'replace' and stencil test is active, their values must match"); } switch (test) { // (Reversed compare mode) case COMPARE_NONE: default: pass->pipeline->info.stencil.test = GPU_COMPARE_NONE; break; case COMPARE_EQUAL: pass->pipeline->info.stencil.test = GPU_COMPARE_EQUAL; break; case COMPARE_NEQUAL: pass->pipeline->info.stencil.test = GPU_COMPARE_NEQUAL; break; case COMPARE_LESS: pass->pipeline->info.stencil.test = GPU_COMPARE_GREATER; break; case COMPARE_LEQUAL: pass->pipeline->info.stencil.test = GPU_COMPARE_GEQUAL; break; case COMPARE_GREATER: pass->pipeline->info.stencil.test = GPU_COMPARE_LESS; break; case COMPARE_GEQUAL: pass->pipeline->info.stencil.test = GPU_COMPARE_LEQUAL; break; } pass->pipeline->info.stencil.testMask = mask; if (test != COMPARE_NONE) pass->pipeline->info.stencil.value = value; pass->pipeline->dirty = true; } void lovrPassSetStencilWrite(Pass* pass, StencilAction actions[3], uint8_t value, uint8_t mask) { TextureFormat depthFormat = pass->canvas.depth.texture ? pass->canvas.depth.texture->info.format : pass->canvas.depth.format; lovrCheck(depthFormat == FORMAT_D32FS8 || depthFormat == FORMAT_D24S8, "Trying to set stencil mode, but Pass depth texture does not use a stencil format"); bool hasReplace = actions[0] == STENCIL_REPLACE || actions[1] == STENCIL_REPLACE || actions[2] == STENCIL_REPLACE; if (hasReplace && pass->pipeline->info.stencil.test != GPU_COMPARE_NONE) { lovrCheck(value == pass->pipeline->info.stencil.value, "When stencil write is 'replace' and stencil test is active, their values must match"); } pass->pipeline->info.stencil.failOp = (gpu_stencil_op) actions[0]; pass->pipeline->info.stencil.depthFailOp = (gpu_stencil_op) actions[1]; pass->pipeline->info.stencil.passOp = (gpu_stencil_op) actions[2]; pass->pipeline->info.stencil.writeMask = mask; if (hasReplace) pass->pipeline->info.stencil.value = value; pass->pipeline->dirty = true; } void lovrPassSetViewCull(Pass* pass, bool enable) { pass->pipeline->viewCull = enable; } void lovrPassSetWinding(Pass* pass, Winding winding) { pass->pipeline->dirty |= pass->pipeline->info.rasterizer.winding != (gpu_winding) winding; pass->pipeline->info.rasterizer.winding = (gpu_winding) winding; } void lovrPassSetWireframe(Pass* pass, bool wireframe) { if (state.features.wireframe) { pass->pipeline->dirty |= pass->pipeline->info.rasterizer.wireframe != (gpu_winding) wireframe; pass->pipeline->info.rasterizer.wireframe = wireframe; } } void lovrPassSendBuffer(Pass* pass, const char* name, size_t length, Buffer* buffer, uint32_t offset, uint32_t extent) { Shader* shader = pass->pipeline->shader; lovrCheck(shader, "A Shader must be active to send resources"); ShaderResource* resource = findShaderResource(shader, name, length); uint32_t slot = resource->binding; lovrCheck(shader->bufferMask & (1u << slot), "Trying to send a Buffer to '%s', but the active Shader doesn't have a Buffer in that slot", name); lovrCheck(offset < buffer->info.size, "Buffer offset is past the end of the Buffer"); uint32_t limit; if (shader->storageMask & (1u << slot)) { lovrCheck((offset & (state.limits.storageBufferAlign - 1)) == 0, "Storage buffer offset (%d) is not aligned to storageBufferAlign limit (%d)", offset, state.limits.storageBufferAlign); limit = state.limits.storageBufferRange; } else { lovrCheck((offset & (state.limits.uniformBufferAlign - 1)) == 0, "Uniform buffer offset (%d) is not aligned to uniformBufferAlign limit (%d)", offset, state.limits.uniformBufferAlign); limit = state.limits.uniformBufferRange; } if (extent == 0) { extent = MIN(buffer->info.size - offset, limit); } else { lovrCheck(offset + extent <= buffer->info.size, "Buffer range goes past the end of the Buffer"); lovrCheck(extent <= limit, "Buffer range exceeds storageBufferRange/uniformBufferRange limit"); } trackBuffer(pass, buffer, resource->phase, resource->cache); pass->bindings[slot].buffer.object = buffer->gpu; pass->bindings[slot].buffer.offset = buffer->base + offset; pass->bindings[slot].buffer.extent = extent; pass->flags |= DIRTY_BINDINGS; } void lovrPassSendTexture(Pass* pass, const char* name, size_t length, Texture* texture) { Shader* shader = pass->pipeline->shader; lovrCheck(shader, "A Shader must be active to send resources"); ShaderResource* resource = findShaderResource(shader, name, length); uint32_t slot = resource->binding; lovrCheck(shader->textureMask & (1u << slot), "Trying to send a Texture to '%s', but the active Shader doesn't have a Texture in that slot", name); gpu_texture* view = texture->gpu; if (shader->storageMask & (1u << slot)) { lovrCheck(texture->info.usage & TEXTURE_STORAGE, "Textures must be created with the 'storage' usage to send them to image variables in shaders"); view = texture->storageView; } else { lovrCheck(texture->info.usage & TEXTURE_SAMPLE, "Textures must be created with the 'sample' usage to send them to sampler variables in shaders"); } trackTexture(pass, texture, resource->phase, resource->cache); pass->bindings[slot].texture = view; pass->flags |= DIRTY_BINDINGS; } void lovrPassSendSampler(Pass* pass, const char* name, size_t length, Sampler* sampler) { Shader* shader = pass->pipeline->shader; lovrCheck(shader, "A Shader must be active to send resources"); ShaderResource* resource = findShaderResource(shader, name, length); uint32_t slot = resource->binding; lovrCheck(shader->samplerMask & (1u << slot), "Trying to send a Sampler to '%s', but the active Shader doesn't have a Sampler in that slot", name); pass->bindings[slot].sampler = sampler->gpu; pass->flags |= DIRTY_BINDINGS; } void lovrPassSendData(Pass* pass, const char* name, size_t length, void** data, DataField** format) { Shader* shader = pass->pipeline->shader; lovrCheck(shader, "A Shader must be active to send data to it"); uint32_t hash = (uint32_t) hash64(name, length); for (uint32_t i = 0; i < shader->uniformCount; i++) { if (shader->uniforms[i].hash == hash) { *data = (char*) pass->uniforms + shader->uniforms[i].offset; *format = &shader->uniforms[i]; pass->flags |= DIRTY_UNIFORMS; return; } } ShaderResource* resource = findShaderResource(shader, name, length); uint32_t slot = resource->binding; lovrCheck(shader->bufferMask & (1u << slot), "Trying to send data to '%s', but that slot isn't a Buffer", name); lovrCheck(~shader->storageMask & (1u << slot), "Unable to send table data to a storage buffer"); uint32_t size = resource->format->stride * MAX(resource->format->length, 1); BufferView view = lovrPassGetBuffer(pass, size, state.limits.uniformBufferAlign); pass->bindings[slot].buffer = (gpu_buffer_binding) { view.buffer, view.offset, view.extent }; pass->flags |= DIRTY_BINDINGS; *data = view.pointer; *format = resource->format; } static void lovrPassResolvePipeline(Pass* pass, DrawInfo* info, Draw* draw, Draw* prev) { Pipeline* pipeline = pass->pipeline; Shader* shader = draw->shader; if (pipeline->info.drawMode != (gpu_draw_mode) info->mode) { pipeline->info.drawMode = (gpu_draw_mode) info->mode; pipeline->dirty = true; } if (!pipeline->shader && pipeline->info.shader != shader->gpu) { pipeline->info.shader = shader->gpu; pipeline->info.flags = NULL; pipeline->info.flagCount = 0; pipeline->dirty = true; } // Vertex formats if (info->vertex.buffer && pipeline->lastVertexBuffer != info->vertex.buffer) { pipeline->lastVertexFormat = ~0u; pipeline->lastVertexBuffer = info->vertex.buffer; pipeline->dirty = true; const DataField* format = info->vertex.buffer->info.format; pipeline->info.vertex.bufferCount = 2; pipeline->info.vertex.attributeCount = shader->attributeCount; pipeline->info.vertex.bufferStrides[0] = format->stride; pipeline->info.vertex.bufferStrides[1] = 0; for (uint32_t i = 0; i < shader->attributeCount; i++) { ShaderAttribute* attribute = &shader->attributes[i]; bool found = false; for (uint32_t j = 0; j < format->fieldCount; j++) { DataField* field = &format->fields[j]; if (field->hash == attribute->hash || field->hash == attribute->location) { lovrCheck(field->type < TYPE_MAT2, "Currently vertex attributes can not use matrix or index types"); pipeline->info.vertex.attributes[i] = (gpu_attribute) { .buffer = 0, .location = attribute->location, .offset = field->offset, .type = field->type }; found = true; break; } } if (!found) { pipeline->info.vertex.attributes[i] = (gpu_attribute) { .buffer = 1, .location = attribute->location, .offset = attribute->location == LOCATION_COLOR ? 16 : 0, .type = GPU_TYPE_F32x4 }; } } } else if (!info->vertex.buffer && pipeline->lastVertexFormat != info->vertex.format) { pipeline->lastVertexFormat = info->vertex.format; pipeline->lastVertexBuffer = NULL; pipeline->info.vertex = state.vertexFormats[info->vertex.format]; pipeline->dirty = true; if (shader->hasCustomAttributes) { for (uint32_t i = 0; i < shader->attributeCount; i++) { if (shader->attributes[i].location < 10) { pipeline->info.vertex.attributes[pipeline->info.vertex.attributeCount++] = (gpu_attribute) { .buffer = 1, .location = shader->attributes[i].location, .type = GPU_TYPE_F32x4, .offset = shader->attributes[i].location == LOCATION_COLOR ? 16 : 0 }; } } } } if (pipeline->dirty) { pipeline->dirty = false; draw->pipelineInfo = lovrPassAllocate(pass, sizeof(gpu_pipeline_info)); memcpy(draw->pipelineInfo, &pipeline->info, sizeof(pipeline->info)); draw->pipeline = NULL; } else { draw->pipelineInfo = prev->pipelineInfo; draw->pipeline = prev->pipeline; } } static void lovrPassResolveVertices(Pass* pass, DrawInfo* info, Draw* draw) { CachedShape* cached = info->hash ? &pass->geocache[info->hash & (COUNTOF(pass->geocache) - 1)] : NULL; if (cached && cached->hash == info->hash) { draw->vertexBuffer = cached->vertexBuffer; draw->indexBuffer = cached->indexBuffer; draw->start = cached->start; draw->baseVertex = cached->baseVertex; draw->vertexBufferOffset = cached->vertexBufferOffset; *info->vertex.pointer = NULL; *info->index.pointer = NULL; return; } if (!info->vertex.buffer && info->vertex.count > 0) { lovrCheck(info->vertex.count <= UINT16_MAX, "Shape has too many vertices (max is 65535)"); uint32_t stride = state.vertexFormats[info->vertex.format].bufferStrides[0]; BufferView view = lovrPassGetBuffer(pass, info->vertex.count * stride, stride); *info->vertex.pointer = view.pointer; draw->vertexBuffer = view.buffer; draw->vertexBufferOffset = view.offset; } else if (info->vertex.buffer) { Buffer* buffer = info->vertex.buffer; uint32_t stride = buffer->info.format->stride; lovrCheck(stride <= state.limits.vertexBufferStride, "Vertex buffer stride exceeds vertexBufferStride limit"); trackBuffer(pass, buffer, GPU_PHASE_INPUT_VERTEX, GPU_CACHE_VERTEX); draw->vertexBuffer = buffer->gpu; draw->vertexBufferOffset = buffer->base; } else { draw->vertexBuffer = state.defaultBuffer->gpu; draw->vertexBufferOffset = state.defaultBuffer->base; } if (!info->index.buffer && info->index.count > 0) { BufferView view = lovrPassGetBuffer(pass, info->index.count * 2, 2); *info->index.pointer = view.pointer; draw->indexBuffer = view.buffer; draw->start = view.offset / 2; } else if (info->index.buffer) { trackBuffer(pass, info->index.buffer, GPU_PHASE_INPUT_INDEX, GPU_CACHE_INDEX); draw->indexBuffer = info->index.buffer->gpu; draw->flags |= info->index.buffer->info.format->stride == 4 ? DRAW_INDEX32 : 0; draw->start += info->index.buffer->base / info->index.buffer->info.format->stride; } else { draw->indexBuffer = NULL; } if (info->hash) { cached->hash = info->hash; cached->vertexBuffer = draw->vertexBuffer; cached->indexBuffer = draw->indexBuffer; cached->start = draw->start; cached->baseVertex = draw->baseVertex; cached->vertexBufferOffset = draw->vertexBufferOffset; } } static gpu_bundle_info* lovrPassResolveBindings(Pass* pass, Shader* shader, gpu_bundle_info* previous) { if (shader->resourceCount == 0) { return NULL; } if (~pass->flags & DIRTY_BINDINGS) { return previous; } gpu_bundle_info* bundle = lovrPassAllocate(pass, sizeof(gpu_bundle_info)); bundle->bindings = lovrPassAllocate(pass, shader->resourceCount * sizeof(gpu_binding)); bundle->layout = state.layouts.data[shader->layout].gpu; bundle->count = shader->resourceCount; for (uint32_t i = 0; i < bundle->count; i++) { bundle->bindings[i] = pass->bindings[shader->resources[i].binding]; bundle->bindings[i].type = shader->resources[i].type; bundle->bindings[i].number = shader->resources[i].binding; bundle->bindings[i].count = 0; } pass->flags &= ~DIRTY_BINDINGS; return bundle; } static void lovrPassResolveUniforms(Pass* pass, Shader* shader, gpu_buffer** buffer, uint32_t* offset) { BufferView view = lovrPassGetBuffer(pass, shader->uniformSize, state.limits.uniformBufferAlign); memcpy(view.pointer, pass->uniforms, shader->uniformSize); *buffer = view.buffer; *offset = view.offset; } void lovrPassDraw(Pass* pass, DrawInfo* info) { if (pass->drawCount >= pass->drawCapacity) { lovrAssert(pass->drawCount < 1 << 16, "Pass has too many draws!"); pass->drawCapacity = pass->drawCapacity > 0 ? pass->drawCapacity << 1 : 1; Draw* draws = lovrPassAllocate(pass, pass->drawCapacity * sizeof(Draw)); if (pass->draws) memcpy(draws, pass->draws, pass->drawCount * sizeof(Draw)); pass->draws = draws; } Draw* previous = pass->drawCount > 0 ? &pass->draws[pass->drawCount - 1] : NULL; Draw* draw = &pass->draws[pass->drawCount++]; draw->flags = 0; draw->tally = pass->tally.active ? pass->tally.count : ~0u; draw->camera = pass->cameraCount - 1; pass->flags &= ~DIRTY_CAMERA; draw->shader = pass->pipeline->shader ? pass->pipeline->shader : lovrGraphicsGetDefaultShader(info->shader); lovrCheck(draw->shader->info.type == SHADER_GRAPHICS, "Tried to draw while a compute shader is active"); lovrRetain(draw->shader); draw->material = info->material; if (!draw->material) draw->material = pass->pipeline->material; if (!draw->material) draw->material = state.defaultMaterial; trackMaterial(pass, draw->material); draw->start = info->start; draw->count = info->count > 0 ? info->count : (info->index.buffer || info->index.count > 0 ? info->index.count : info->vertex.count); draw->instances = MAX(info->instances, 1); draw->baseVertex = info->baseVertex; lovrPassResolvePipeline(pass, info, draw, previous); lovrPassResolveVertices(pass, info, draw); draw->bundleInfo = lovrPassResolveBindings(pass, draw->shader, previous ? previous->bundleInfo : NULL); if (draw->shader->uniformCount > 0 && pass->flags & DIRTY_UNIFORMS) { lovrPassResolveUniforms(pass, draw->shader, &draw->uniformBuffer, &draw->uniformOffset); pass->flags &= ~DIRTY_UNIFORMS; } else { draw->uniformBuffer = previous ? previous->uniformBuffer : NULL; draw->uniformOffset = previous ? previous->uniformOffset : 0; } if (pass->pipeline->viewCull && info->bounds) { memcpy(draw->bounds, info->bounds, sizeof(draw->bounds)); draw->flags |= DRAW_HAS_BOUNDS; pass->flags |= NEEDS_VIEW_CULL; } mat4_init(draw->transform, pass->transform); if (info->transform) mat4_mul(draw->transform, info->transform); memcpy(draw->color, pass->pipeline->color, 4 * sizeof(float)); } void lovrPassPoints(Pass* pass, uint32_t count, float** points) { lovrPassDraw(pass, &(DrawInfo) { .mode = DRAW_POINTS, .vertex.format = VERTEX_POINT, .vertex.pointer = (void**) points, .vertex.count = count }); } void lovrPassLine(Pass* pass, uint32_t count, float** points) { lovrCheck(count >= 2, "Need at least 2 points to make a line"); uint16_t* indices; lovrPassDraw(pass, &(DrawInfo) { .mode = DRAW_LINES, .vertex.format = VERTEX_POINT, .vertex.pointer = (void**) points, .vertex.count = count, .index.pointer = (void**) &indices, .index.count = 2 * (count - 1) }); for (uint32_t i = 0; i < count - 1; i++) { indices[2 * i + 0] = i; indices[2 * i + 1] = i + 1; } } void lovrPassPlane(Pass* pass, float* transform, DrawStyle style, uint32_t cols, uint32_t rows) { uint32_t key[] = { SHAPE_PLANE, style, cols, rows }; ShapeVertex* vertices; uint16_t* indices; uint32_t vertexCount = (cols + 1) * (rows + 1); uint32_t indexCount; if (style == STYLE_LINE) { indexCount = 2 * (rows + 1) + 2 * (cols + 1); lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_LINES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, .5f, .5f, 0.f }, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount }); } else { indexCount = (cols * rows) * 6; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_TRIANGLES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, .5f, .5f, 0.f }, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount }); } if (!vertices) { return; } for (uint32_t y = 0; y <= rows; y++) { float v = y * (1.f / rows); for (uint32_t x = 0; x <= cols; x++) { float u = x * (1.f / cols); *vertices++ = (ShapeVertex) { .position = { u - .5f, .5f - v, 0.f }, .normal = { 0.f, 0.f, 1.f }, .uv = { u, v } }; } } if (style == STYLE_LINE) { for (uint32_t y = 0; y <= rows; y++) { uint16_t a = y * (cols + 1); uint16_t b = a + cols; uint16_t line[] = { a, b }; memcpy(indices, line, sizeof(line)); indices += COUNTOF(line); } for (uint32_t x = 0; x <= cols; x++) { uint16_t a = x; uint16_t b = x + ((cols + 1) * rows); uint16_t line[] = { a, b }; memcpy(indices, line, sizeof(line)); indices += COUNTOF(line); } } else { for (uint32_t y = 0; y < rows; y++) { for (uint32_t x = 0; x < cols; x++) { uint16_t a = (y * (cols + 1)) + x; uint16_t b = a + 1; uint16_t c = a + cols + 1; uint16_t d = a + cols + 2; uint16_t cell[] = { a, c, b, b, c, d }; memcpy(indices, cell, sizeof(cell)); indices += COUNTOF(cell); } } } } void lovrPassRoundrect(Pass* pass, float* transform, float r, uint32_t segments) { bool thicc = vec3_length(transform + 8) > 0.f; float w = vec3_length(transform + 0); float h = vec3_length(transform + 4); r = MIN(MIN(r, w / 2.f), h / 2.f); float rx = MIN(r / w, .5f); float ry = MIN(r / h, .5f); uint32_t n = segments + 1; if (!thicc && (r <= 0.f || w == 0.f || h == 0.f)) { lovrPassPlane(pass, transform, STYLE_FILL, 1, 1); return; } uint32_t vertexCount; uint32_t indexCount; if (thicc) { vertexCount = 8 + (segments + 1) * 16; indexCount = 3 * 8 * segments + 6 * 4 * (segments + 1) + 60; } else { vertexCount = 4 + (segments + 1) * 4; indexCount = 3 * 4 * segments + 30; } ShapeVertex* vertices; uint16_t* indices; lovrPassDraw(pass, &(DrawInfo) { .mode = DRAW_TRIANGLES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, .5f, .5f, .5f }, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount }); uint32_t c = vertexCount - (thicc ? 8 : 4); ShapeVertex* corner = vertices + c; float angle = 0.f; float step = (float) M_PI / 2.f / segments; float x = .5f - rx; float y = .5f - ry; float z = .5f; float nz = 1.f; // If the rounded rectangle is thick, loop twice (front and back), otherwise do only a single side for (uint32_t side = 0; side <= (uint32_t) thicc; side++, z *= -1.f, nz *= -1.f, angle = 0.f) { for (uint32_t i = 0; i < n; i++, angle += step) { float c = cosf(angle); float s = sinf(angle); vertices[n * 0 + i] = (ShapeVertex) { { x + c * rx, y + s * ry, z }, { 0.f, 0.f, nz }, { .5f + x + c * rx, .5f - y - s * ry } }; vertices[n * 1 + i] = (ShapeVertex) { { -x - s * rx, y + c * ry, z }, { 0.f, 0.f, nz }, { .5f - x - s * rx, .5f - y - c * ry } }; vertices[n * 2 + i] = (ShapeVertex) { { -x - c * rx, -y - s * ry, z }, { 0.f, 0.f, nz }, { .5f - x - c * rx, .5f + y + s * ry } }; vertices[n * 3 + i] = (ShapeVertex) { { x + s * rx, -y - c * ry, z }, { 0.f, 0.f, nz }, { .5f + x + s * rx, .5f + y + c * ry } }; if (thicc) { vertices[n * 8 + i] = (ShapeVertex) { { x + c * rx, y + s * ry, z }, { c, s, 0.f }, { .5f + x + c * rx, .5f - y - s * ry } }; vertices[n * 9 + i] = (ShapeVertex) { { -x - s * rx, y + c * ry, z }, { c, s, 0.f }, { .5f - x - s * rx, .5f - y - c * ry } }; vertices[n * 10 + i] = (ShapeVertex) { { -x - c * rx, -y - s * ry, z }, { c, s, 0.f }, { .5f - x - c * rx, .5f + y + s * ry } }; vertices[n * 11 + i] = (ShapeVertex) { { x + s * rx, -y - c * ry, z }, { c, s, 0.f }, { .5f + x + s * rx, .5f + y + c * ry } }; } } vertices += 4 * n; // 4 extra corner vertices per-side, used for the triangle fans and 9-slice quads *corner++ = (ShapeVertex) { { x, y, z }, { 0.f, 0.f, nz }, { .5f + x, .5f - y } }; *corner++ = (ShapeVertex) { { -x, y, z }, { 0.f, 0.f, nz }, { .5f - x, .5f - y } }; *corner++ = (ShapeVertex) { { -x, -y, z }, { 0.f, 0.f, nz }, { .5f - x, .5f + y } }; *corner++ = (ShapeVertex) { { x, -y, z }, { 0.f, 0.f, nz }, { .5f + x, .5f + y } }; } uint32_t m = segments; uint16_t front[] = { n * 0 + m, n * 1, c + 0, c + 0, n * 1, c + 1, // top c + 1, n * 1 + m, c + 2, c + 2, n * 1 + m, n * 2, // left n * 0, c + 0, n * 3 + m, n * 3 + m, c + 0, c + 3, // right c + 3, c + 2, n * 3, n * 3, c + 2, 2 * n + m, // bot c + 0, c + 1, c + 3, c + 3, c + 1, c + 2 // center }; memcpy(indices, front, sizeof(front)); indices += COUNTOF(front); for (uint32_t i = 0; i < 4; i++) { for (uint32_t j = 0; j < segments; j++) { memcpy(indices, (uint16_t[]) { c + i, n * i + j, n * i + j + 1 }, 3 * sizeof(uint16_t)); indices += 3; } } if (thicc) { uint16_t back[] = { n * 4 + m, c + 4, n * 5, n * 5, c + 4, c + 5, // top c + 5, c + 6, n * 5 + m, n * 5 + m, c + 6, n * 6, // left n * 4, n * 7 + m, c + 4, c + 4, n * 7 + m, c + 7, // right c + 7, n * 7, c + 6, c + 6, n * 7, 6 * n + m, // bot c + 4, c + 7, c + 5, c + 5, c + 7, c + 6 // center }; memcpy(indices, back, sizeof(back)); indices += COUNTOF(back); for (uint32_t i = 4; i < 8; i++) { for (uint32_t j = 0; j < segments; j++) { memcpy(indices, (uint16_t[]) { n * i + j, c + i, n * i + j + 1 }, 3 * sizeof(uint16_t)); indices += 3; } } // Stitch sides together for (uint32_t i = 0; i < 4 * n - 1; i++) { uint16_t a = 8 * n + i; uint16_t b = 12 * n + i; memcpy(indices, (uint16_t[]) { a, b, b + 1, a, b + 1, a + 1 }, 6 * sizeof(uint16_t)); indices += 6; } // Handle discontinuity uint16_t a = 11 * n + m; uint16_t b = 15 * n + m; uint16_t c = 12 * n; uint16_t d = 8 * n; memcpy(indices, (uint16_t[]) { a, b, c, a, c, d }, 6 * sizeof(uint16_t)); indices += 6; } } void lovrPassBox(Pass* pass, float* transform, DrawStyle style) { uint32_t key[] = { SHAPE_BOX, style }; ShapeVertex* vertices; uint16_t* indices; if (style == STYLE_LINE) { static ShapeVertex vertexData[] = { { { -.5f, .5f, -.5f }, { 0.f, 0.f, 0.f }, { 0.f, 0.f } }, // Front { { .5f, .5f, -.5f }, { 0.f, 0.f, 0.f }, { 0.f, 0.f } }, { { .5f, -.5f, -.5f }, { 0.f, 0.f, 0.f }, { 0.f, 0.f } }, { { -.5f, -.5f, -.5f }, { 0.f, 0.f, 0.f }, { 0.f, 0.f } }, { { -.5f, .5f, .5f }, { 0.f, 0.f, 0.f }, { 0.f, 0.f } }, // Back { { .5f, .5f, .5f }, { 0.f, 0.f, 0.f }, { 0.f, 0.f } }, { { .5f, -.5f, .5f }, { 0.f, 0.f, 0.f }, { 0.f, 0.f } }, { { -.5f, -.5f, .5f }, { 0.f, 0.f, 0.f }, { 0.f, 0.f } } }; static uint16_t indexData[] = { 0, 1, 1, 2, 2, 3, 3, 0, // Front 4, 5, 5, 6, 6, 7, 7, 4, // Back 0, 4, 1, 5, 2, 6, 3, 7 // Connections }; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_LINES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, .5f, .5f, .5f }, .vertex.pointer = (void**) &vertices, .vertex.count = COUNTOF(vertexData), .index.pointer = (void**) &indices, .index.count = COUNTOF(indexData) }); if (vertices) { memcpy(vertices, vertexData, sizeof(vertexData)); memcpy(indices, indexData, sizeof(indexData)); } } else { static ShapeVertex vertexData[] = { { { -.5f, -.5f, -.5f }, { 0.f, 0.f, -1.f }, { 0.f, 0.f } }, // Front { { -.5f, .5f, -.5f }, { 0.f, 0.f, -1.f }, { 0.f, 1.f } }, { { .5f, -.5f, -.5f }, { 0.f, 0.f, -1.f }, { 1.f, 0.f } }, { { .5f, .5f, -.5f }, { 0.f, 0.f, -1.f }, { 1.f, 1.f } }, { { .5f, .5f, -.5f }, { 1.f, 0.f, 0.f }, { 0.f, 1.f } }, // Right { { .5f, .5f, .5f }, { 1.f, 0.f, 0.f }, { 1.f, 1.f } }, { { .5f, -.5f, -.5f }, { 1.f, 0.f, 0.f }, { 0.f, 0.f } }, { { .5f, -.5f, .5f }, { 1.f, 0.f, 0.f }, { 1.f, 0.f } }, { { .5f, -.5f, .5f }, { 0.f, 0.f, 1.f }, { 0.f, 0.f } }, // Back { { .5f, .5f, .5f }, { 0.f, 0.f, 1.f }, { 0.f, 1.f } }, { { -.5f, -.5f, .5f }, { 0.f, 0.f, 1.f }, { 1.f, 0.f } }, { { -.5f, .5f, .5f }, { 0.f, 0.f, 1.f }, { 1.f, 1.f } }, { { -.5f, .5f, .5f }, { -1.f, 0.f, 0.f }, { 0.f, 1.f } }, // Left { { -.5f, .5f, -.5f }, { -1.f, 0.f, 0.f }, { 1.f, 1.f } }, { { -.5f, -.5f, .5f }, { -1.f, 0.f, 0.f }, { 0.f, 0.f } }, { { -.5f, -.5f, -.5f }, { -1.f, 0.f, 0.f }, { 1.f, 0.f } }, { { -.5f, -.5f, -.5f }, { 0.f, -1.f, 0.f }, { 0.f, 0.f } }, // Bottom { { .5f, -.5f, -.5f }, { 0.f, -1.f, 0.f }, { 1.f, 0.f } }, { { -.5f, -.5f, .5f }, { 0.f, -1.f, 0.f }, { 0.f, 1.f } }, { { .5f, -.5f, .5f }, { 0.f, -1.f, 0.f }, { 1.f, 1.f } }, { { -.5f, .5f, -.5f }, { 0.f, 1.f, 0.f }, { 0.f, 1.f } }, // Top { { -.5f, .5f, .5f }, { 0.f, 1.f, 0.f }, { 0.f, 0.f } }, { { .5f, .5f, -.5f }, { 0.f, 1.f, 0.f }, { 1.f, 1.f } }, { { .5f, .5f, .5f }, { 0.f, 1.f, 0.f }, { 1.f, 0.f } } }; static uint16_t indexData[] = { 0, 1, 2, 2, 1, 3, 4, 5, 6, 6, 5, 7, 8, 9, 10, 10, 9, 11, 12, 13, 14, 14, 13, 15, 16, 17, 18, 18, 17, 19, 20, 21, 22, 22, 21, 23 }; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_TRIANGLES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, .5f, .5f, .5f }, .vertex.pointer = (void**) &vertices, .vertex.count = COUNTOF(vertexData), .index.pointer = (void**) &indices, .index.count = COUNTOF(indexData) }); if (vertices) { memcpy(vertices, vertexData, sizeof(vertexData)); memcpy(indices, indexData, sizeof(indexData)); } } } void lovrPassCircle(Pass* pass, float* transform, DrawStyle style, float angle1, float angle2, uint32_t segments) { if (fabsf(angle1 - angle2) >= 2.f * (float) M_PI) { angle1 = 0.f; angle2 = 2.f * (float) M_PI; } uint32_t key[] = { SHAPE_CIRCLE, style, FLOAT_BITS(angle1), FLOAT_BITS(angle2), segments }; ShapeVertex* vertices; uint16_t* indices; if (style == STYLE_LINE) { uint32_t vertexCount = segments + 1; uint32_t indexCount = segments * 2; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_LINES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, 1.f, 1.f, 0.f }, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount }); if (!vertices) { return; } } else { uint32_t vertexCount = segments + 2; uint32_t indexCount = segments * 3; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_TRIANGLES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, 1.f, 1.f, 0.f }, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount }); if (!vertices) { return; } // Center *vertices++ = (ShapeVertex) { { 0.f, 0.f, 0.f }, { 0.f, 0.f, 1.f }, { .5f, .5f } }; } float angleShift = (angle2 - angle1) / segments; for (uint32_t i = 0; i <= segments; i++) { float theta = angle1 + i * angleShift; float x = cosf(theta); float y = sinf(theta); *vertices++ = (ShapeVertex) { { x, y, 0.f }, { 0.f, 0.f, 1.f }, { x + .5f, .5f - y } }; } if (style == STYLE_LINE) { for (uint32_t i = 0; i < segments; i++) { uint16_t segment[] = { i, i + 1 }; memcpy(indices, segment, sizeof(segment)); indices += COUNTOF(segment); } } else { for (uint32_t i = 0; i < segments; i++) { uint16_t wedge[] = { 0, i + 1, i + 2 }; memcpy(indices, wedge, sizeof(wedge)); indices += COUNTOF(wedge); } } } void lovrPassSphere(Pass* pass, float* transform, uint32_t segmentsH, uint32_t segmentsV) { uint32_t vertexCount = 2 + (segmentsH + 1) * (segmentsV - 1); uint32_t indexCount = 2 * 3 * segmentsH + segmentsH * (segmentsV - 2) * 6; ShapeVertex* vertices; uint16_t* indices; uint32_t key[] = { SHAPE_SPHERE, segmentsH, segmentsV }; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_TRIANGLES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, 1.f, 1.f, 1.f }, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount, }); if (!vertices) { return; } // Top *vertices++ = (ShapeVertex) { { 0.f, 1.f, 0.f }, { 0.f, 1.f, 0.f }, { .5f, 0.f } }; // Rings for (uint32_t i = 1; i < segmentsV; i++) { float v = i / (float) segmentsV; float phi = v * (float) M_PI; float sinphi = sinf(phi); float cosphi = cosf(phi); for (uint32_t j = 0; j <= segmentsH; j++) { float u = j / (float) segmentsH; float theta = u * 2.f * (float) M_PI; float sintheta = sinf(theta); float costheta = cosf(theta); float x = sintheta * sinphi; float y = cosphi; float z = -costheta * sinphi; *vertices++ = (ShapeVertex) { { x, y, z }, { x, y, z }, { u, v } }; } } // Bottom *vertices++ = (ShapeVertex) { { 0.f, -1.f, 0.f }, { 0.f, -1.f, 0.f }, { .5f, 1.f } }; // Top for (uint32_t i = 0; i < segmentsH; i++) { uint16_t wedge[] = { 0, i + 2, i + 1 }; memcpy(indices, wedge, sizeof(wedge)); indices += COUNTOF(wedge); } // Rings for (uint32_t i = 0; i < segmentsV - 2; i++) { for (uint32_t j = 0; j < segmentsH; j++) { uint16_t a = 1 + i * (segmentsH + 1) + 0 + j; uint16_t b = 1 + i * (segmentsH + 1) + 1 + j; uint16_t c = 1 + i * (segmentsH + 1) + 0 + segmentsH + 1 + j; uint16_t d = 1 + i * (segmentsH + 1) + 1 + segmentsH + 1 + j; uint16_t quad[] = { a, b, c, c, b, d }; memcpy(indices, quad, sizeof(quad)); indices += COUNTOF(quad); } } // Bottom for (uint32_t i = 0; i < segmentsH; i++) { uint16_t wedge[] = { vertexCount - 1, vertexCount - 1 - (i + 2), vertexCount - 1 - (i + 1) }; memcpy(indices, wedge, sizeof(wedge)); indices += COUNTOF(wedge); } } void lovrPassCylinder(Pass* pass, float* transform, bool capped, float angle1, float angle2, uint32_t segments) { if (fabsf(angle1 - angle2) >= 2.f * (float) M_PI) { angle1 = 0.f; angle2 = 2.f * (float) M_PI; } uint32_t key[] = { SHAPE_CYLINDER, capped, FLOAT_BITS(angle1), FLOAT_BITS(angle2), segments }; uint32_t vertexCount = 2 * (segments + 1); uint32_t indexCount = 6 * segments; ShapeVertex* vertices; uint16_t* indices; if (capped) { vertexCount *= 2; vertexCount += 2; indexCount += 3 * segments * 2; } lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_TRIANGLES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, 1.f, 1.f, .5f }, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount }); if (!vertices) { return; } float angleShift = (angle2 - angle1) / segments; // Tube for (uint32_t i = 0; i <= segments; i++) { float theta = angle1 + i * angleShift; float x = cosf(theta); float y = sinf(theta); *vertices++ = (ShapeVertex) { { x, y, -.5f }, { x, y, 0.f }, { x + .5f, .5f - y } }; *vertices++ = (ShapeVertex) { { x, y, .5f }, { x, y, 0.f }, { x + .5f, .5f - y } }; } // Tube quads for (uint32_t i = 0; i < segments; i++) { uint16_t a = i * 2 + 0; uint16_t b = i * 2 + 1; uint16_t c = i * 2 + 2; uint16_t d = i * 2 + 3; uint16_t quad[] = { a, c, b, b, c, d }; memcpy(indices, quad, sizeof(quad)); indices += COUNTOF(quad); } if (capped) { // Cap centers *vertices++ = (ShapeVertex) { { 0.f, 0.f, -.5f }, { 0.f, 0.f, -1.f }, { .5f, .5f } }; *vertices++ = (ShapeVertex) { { 0.f, 0.f, .5f }, { 0.f, 0.f, 1.f }, { .5f, .5f } }; // Caps for (uint32_t i = 0; i <= segments; i++) { float theta = angle1 + i * angleShift; float x = cosf(theta); float y = sinf(theta); *vertices++ = (ShapeVertex) { { x, y, -.5f }, { 0.f, 0.f, -1.f }, { x + .5f, y - .5f } }; *vertices++ = (ShapeVertex) { { x, y, .5f }, { 0.f, 0.f, 1.f }, { x + .5f, y - .5f } }; } // Cap wedges uint16_t base = 2 * (segments + 1); for (uint32_t i = 0; i < segments; i++) { uint16_t a = base + 0; uint16_t b = base + (i + 1) * 2; uint16_t c = base + (i + 2) * 2; uint16_t wedge1[] = { a + 0, c + 0, b + 0 }; uint16_t wedge2[] = { a + 1, b + 1, c + 1 }; memcpy(indices + 0, wedge1, sizeof(wedge1)); memcpy(indices + 3, wedge2, sizeof(wedge2)); indices += 6; } } } void lovrPassCone(Pass* pass, float* transform, uint32_t segments) { uint32_t key[] = { SHAPE_CONE, segments }; uint32_t vertexCount = 2 * segments + 1; uint32_t indexCount = 3 * (segments - 2) + 3 * segments; ShapeVertex* vertices; uint16_t* indices; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_TRIANGLES, .transform = transform, .bounds = (float[6]) { 0.0f, 0.f, -.5f, 1.f, 1.f, .5f }, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount }); if (!vertices) { return; } for (uint32_t i = 0; i < segments; i++) { float theta = i * 2.f * (float) M_PI / segments; float x = cosf(theta); float y = sinf(theta); float rsqrt3 = .57735f; float nx = cosf(theta) * rsqrt3; float ny = sinf(theta) * rsqrt3; float nz = -rsqrt3; float u = x + .5f; float v = .5f - y; vertices[segments * 0] = (ShapeVertex) { { x, y, 0.f }, { 0.f, 0.f, 1.f }, { u, v } }; vertices[segments * 1] = (ShapeVertex) { { x, y, 0.f }, { nx, ny, nz }, { u, v } }; vertices++; } vertices[segments] = (ShapeVertex) { { 0.f, 0.f, -1.f }, { 0.f, 0.f, 0.f }, { .5f, .5f } }; // Base for (uint32_t i = 0; i < segments - 2; i++) { uint16_t tri[] = { 0, i + 1, i + 2 }; memcpy(indices, tri, sizeof(tri)); indices += COUNTOF(tri); } // Sides for (uint32_t i = 0; i < segments; i++) { uint16_t tri[] = { segments + (i + 1) % segments, segments + i, vertexCount - 1 }; memcpy(indices, tri, sizeof(tri)); indices += COUNTOF(tri); } } void lovrPassCapsule(Pass* pass, float* transform, uint32_t segments) { float sx = vec3_length(transform + 0); float sy = vec3_length(transform + 4); float sz = vec3_length(transform + 8); float length = sz * .5f; float radius = sx; if (length == 0.f) { float rotation[4]; vec3_cross(vec3_init(transform + 8, transform + 0), transform + 4); vec3_scale(transform + 8, 1.f / radius); mat4_rotateQuat(transform, quat_fromAngleAxis(rotation, (float) M_PI / 2.f, 1.f, 0.f, 0.f)); lovrPassSphere(pass, transform, segments, segments); return; } vec3_scale(transform + 0, 1.f / sx); vec3_scale(transform + 4, 1.f / sy); vec3_scale(transform + 8, 1.f / sz); uint32_t key[] = { SHAPE_CAPSULE, FLOAT_BITS(radius), FLOAT_BITS(length), segments }; uint32_t rings = segments / 2; uint32_t vertexCount = 2 * (1 + rings * (segments + 1)); uint32_t indexCount = 2 * (3 * segments + 6 * segments * (rings - 1)) + 6 * segments; ShapeVertex* vertices; uint16_t* indices; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_TRIANGLES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, radius, radius, length + radius }, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount }); if (!vertices) { return; } float tip = length + radius; uint32_t h = vertexCount / 2; vertices[0] = (ShapeVertex) { { 0.f, 0.f, -tip }, { 0.f, 0.f, -1.f }, { .5f, 0.f } }; vertices[h] = (ShapeVertex) { { 0.f, 0.f, tip }, { 0.f, 0.f, 1.f }, { .5f, 1.f } }; vertices++; for (uint32_t i = 1; i <= rings; i++) { float v = i / (float) rings; float phi = v * (float) M_PI / 2.f; float sinphi = sinf(phi); float cosphi = cosf(phi); for (uint32_t j = 0; j <= segments; j++) { float u = j / (float) segments; float theta = u * (float) M_PI * 2.f; float sintheta = sinf(theta); float costheta = cosf(theta); float x = costheta * sinphi; float y = sintheta * sinphi; float z = cosphi; vertices[0] = (ShapeVertex) { { x * radius, y * radius, -(length + z * radius) }, { x, y, -z }, { u, v } }; vertices[h] = (ShapeVertex) { { x * radius, y * radius, (length + z * radius) }, { x, y, z }, { u, 1.f - v } }; vertices++; } } uint16_t* i1 = indices; uint16_t* i2 = indices + (indexCount - 6 * segments) / 2; for (uint32_t i = 0; i < segments; i++) { uint16_t wedge1[] = { 0, 0 + i + 2, 0 + i + 1 }; uint16_t wedge2[] = { h, h + i + 1, h + i + 2 }; memcpy(i1, wedge1, sizeof(wedge1)); memcpy(i2, wedge2, sizeof(wedge2)); i1 += COUNTOF(wedge1); i2 += COUNTOF(wedge2); } for (uint32_t i = 0; i < rings - 1; i++) { for (uint32_t j = 0; j < segments; j++) { uint16_t a = 1 + i * (segments + 1) + 0 + j; uint16_t b = 1 + i * (segments + 1) + 1 + j; uint16_t c = 1 + i * (segments + 1) + 0 + segments + 1 + j; uint16_t d = 1 + i * (segments + 1) + 1 + segments + 1 + j; uint16_t quad1[] = { a, b, c, c, b, d }; uint16_t quad2[] = { h + a, h + c, h + b, h + b, h + c, h + d }; memcpy(i1, quad1, sizeof(quad1)); memcpy(i2, quad2, sizeof(quad2)); i1 += COUNTOF(quad1); i2 += COUNTOF(quad2); } } for (uint32_t i = 0; i < segments; i++) { uint16_t a = h - segments - 1 + i; uint16_t b = h - segments - 1 + i + 1; uint16_t c = vertexCount - segments - 1 + i; uint16_t d = vertexCount - segments - 1 + i + 1; uint16_t quad[] = { a, b, c, c, b, d }; memcpy(i2, quad, sizeof(quad)); i2 += COUNTOF(quad); } } void lovrPassTorus(Pass* pass, float* transform, uint32_t segmentsT, uint32_t segmentsP) { float sx = vec3_length(transform + 0); float sy = vec3_length(transform + 4); float sz = vec3_length(transform + 8); vec3_scale(transform + 0, 1.f / sx); vec3_scale(transform + 4, 1.f / sy); vec3_scale(transform + 8, 1.f / sz); float radius = sx * .5f; float thickness = sz * .5f; uint32_t key[] = { SHAPE_TORUS, FLOAT_BITS(radius), FLOAT_BITS(thickness), segmentsT, segmentsP }; uint32_t vertexCount = segmentsT * segmentsP; uint32_t indexCount = segmentsT * segmentsP * 6; ShapeVertex* vertices; uint16_t* indices; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_TRIANGLES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, radius + thickness, radius + thickness, thickness }, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount }); if (!vertices) { return; } // T and P stand for toroidal and poloidal, or theta and phi float dt = (2.f * (float) M_PI) / segmentsT; float dp = (2.f * (float) M_PI) / segmentsP; for (uint32_t t = 0; t < segmentsT; t++) { float theta = t * dt; float tx = cosf(theta); float ty = sinf(theta); for (uint32_t p = 0; p < segmentsP; p++) { float phi = p * dp; float nx = cosf(phi) * tx; float ny = cosf(phi) * ty; float nz = sinf(phi); *vertices++ = (ShapeVertex) { .position = { tx * radius + nx * thickness, ty * radius + ny * thickness, nz * thickness }, .normal = { nx, ny, nz } }; uint16_t a = (t + 0) * segmentsP + p; uint16_t b = (t + 1) % segmentsT * segmentsP + p; uint16_t c = (t + 0) * segmentsP + (p + 1) % segmentsP; uint16_t d = (t + 1) % segmentsT * segmentsP + (p + 1) % segmentsP; uint16_t quad[] = { a, b, c, c, b, d }; memcpy(indices, quad, sizeof(quad)); indices += COUNTOF(quad); } } } void lovrPassText(Pass* pass, ColoredString* strings, uint32_t count, float* transform, float wrap, HorizontalAlign halign, VerticalAlign valign) { Font* font = pass->pipeline->font ? pass->pipeline->font : lovrGraphicsGetDefaultFont(); size_t totalLength = 0; for (uint32_t i = 0; i < count; i++) { totalLength += strings[i].length; } size_t stack = tempPush(&state.allocator); GlyphVertex* vertices = tempAlloc(&state.allocator, totalLength * 4 * sizeof(GlyphVertex)); uint32_t glyphCount; uint32_t lineCount; float leading = lovrRasterizerGetLeading(font->info.rasterizer) * font->lineSpacing; float ascent = lovrRasterizerGetAscent(font->info.rasterizer); float scale = 1.f / font->pixelDensity; wrap /= scale; Material* material; bool flip = pass->cameras[(pass->cameraCount - 1) * pass->canvas.views].projection[5] > 0.f; lovrFontGetVertices(font, strings, count, wrap, halign, valign, vertices, &glyphCount, &lineCount, &material, flip); mat4_scale(transform, scale, scale, scale); float offset = -ascent + valign / 2.f * (leading * lineCount); mat4_translate(transform, 0.f, flip ? -offset : offset, 0.f); GlyphVertex* vertexPointer; uint16_t* indices; lovrPassDraw(pass, &(DrawInfo) { .mode = DRAW_TRIANGLES, .shader = SHADER_FONT, .material = font->material, .transform = transform, .vertex.format = VERTEX_GLYPH, .vertex.pointer = (void**) &vertexPointer, .vertex.count = glyphCount * 4, .index.pointer = (void**) &indices, .index.count = glyphCount * 6 }); memcpy(vertexPointer, vertices, glyphCount * 4 * sizeof(GlyphVertex)); for (uint32_t i = 0; i < glyphCount * 4; i += 4) { uint16_t quad[] = { i + 0, i + 2, i + 1, i + 1, i + 2, i + 3 }; memcpy(indices, quad, sizeof(quad)); indices += COUNTOF(quad); } tempPop(&state.allocator, stack); } void lovrPassSkybox(Pass* pass, Texture* texture) { lovrPassDraw(pass, &(DrawInfo) { .mode = DRAW_TRIANGLES, .shader = !texture || texture->info.type == TEXTURE_2D ? SHADER_EQUIRECT : SHADER_CUBEMAP, .material = texture ? lovrTextureToMaterial(texture) : NULL, .vertex.format = VERTEX_EMPTY, .count = 6 }); } void lovrPassFill(Pass* pass, Texture* texture) { lovrPassDraw(pass, &(DrawInfo) { .mode = DRAW_TRIANGLES, .shader = texture && texture->info.type == TEXTURE_ARRAY ? SHADER_FILL_ARRAY : SHADER_FILL_2D, .material = texture ? lovrTextureToMaterial(texture) : NULL, .vertex.format = VERTEX_EMPTY, .count = 3 }); } void lovrPassMonkey(Pass* pass, float* transform) { uint32_t key[] = { SHAPE_MONKEY }; uint32_t vertexCount = COUNTOF(monkey_vertices) / 6; ShapeVertex* vertices; uint16_t* indices; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_TRIANGLES, .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = COUNTOF(monkey_indices), .transform = transform, .bounds = monkey_bounds }); if (!vertices) { return; } // Manual vertex format conversion to avoid another format (and sn8x3 isn't always supported) for (uint32_t i = 0; i < vertexCount; i++) { vertices[i] = (ShapeVertex) { .position.x = monkey_vertices[6 * i + 0] / 255.f * monkey_bounds[3] * 2.f + monkey_offset[0], .position.y = monkey_vertices[6 * i + 1] / 255.f * monkey_bounds[4] * 2.f + monkey_offset[1], .position.z = monkey_vertices[6 * i + 2] / 255.f * monkey_bounds[5] * 2.f + monkey_offset[2], .normal.x = monkey_vertices[6 * i + 3] / 255.f * 2.f - 1.f, .normal.y = monkey_vertices[6 * i + 4] / 255.f * 2.f - 1.f, .normal.z = monkey_vertices[6 * i + 5] / 255.f * 2.f - 1.f, }; } memcpy(indices, monkey_indices, sizeof(monkey_indices)); } void lovrPassDrawMesh(Pass* pass, Mesh* mesh, float* transform, uint32_t instances) { uint32_t extent = mesh->indexCount > 0 ? mesh->indexCount : mesh->vertexBuffer->info.format->length; uint32_t start = MIN(mesh->drawStart, extent - 1); uint32_t count = mesh->drawCount > 0 ? MIN(mesh->drawCount, extent - start) : extent - start; lovrMeshFlush(mesh); lovrPassDraw(pass, &(DrawInfo) { .mode = mesh->mode, .transform = transform, .bounds = mesh->hasBounds ? mesh->bounds : NULL, .material = mesh->material, .vertex.buffer = mesh->vertexBuffer, .index.buffer = mesh->indexBuffer, .start = start, .count = count, .instances = instances }); } static void drawNode(Pass* pass, Model* model, uint32_t index, uint32_t instances) { ModelNode* node = &model->info.data->nodes[index]; mat4 globalTransform = model->globalTransforms + 16 * index; for (uint32_t i = 0; i < node->primitiveCount; i++) { DrawInfo draw = model->draws[node->primitiveIndex + i]; if (node->skin == ~0u) draw.transform = globalTransform; draw.instances = instances; lovrPassDraw(pass, &draw); } for (uint32_t i = 0; i < node->childCount; i++) { drawNode(pass, model, node->children[i], instances); } } void lovrPassDrawModel(Pass* pass, Model* model, float* transform, uint32_t instances) { lovrModelAnimateVertices(model); if (model->transformsDirty) { updateModelTransforms(model, model->info.data->rootNode, (float[]) MAT4_IDENTITY); model->transformsDirty = false; } lovrPassPush(pass, STACK_TRANSFORM); lovrPassTransform(pass, transform); drawNode(pass, model, model->info.data->rootNode, instances); lovrPassPop(pass, STACK_TRANSFORM); } void lovrPassDrawTexture(Pass* pass, Texture* texture, float* transform) { uint32_t key[] = { SHAPE_PLANE, STYLE_FILL, 1, 1 }; ShapeVertex* vertices; uint16_t* indices; float aspect = (float) texture->info.height / texture->info.width; transform[4] *= aspect; transform[5] *= aspect; transform[6] *= aspect; transform[7] *= aspect; uint32_t vertexCount = 4; uint32_t indexCount = 6; lovrPassDraw(pass, &(DrawInfo) { .hash = hash64(key, sizeof(key)), .mode = DRAW_TRIANGLES, .transform = transform, .bounds = (float[6]) { 0.f, 0.f, 0.f, .5f, .5f, 0.f }, .material = lovrTextureToMaterial(texture), .vertex.pointer = (void**) &vertices, .vertex.count = vertexCount, .index.pointer = (void**) &indices, .index.count = indexCount }); ShapeVertex vertexData[] = { { { -.5f, .5f, 0.f }, { 0.f, 0.f, 1.f }, { 0.f, 0.f } }, { { .5f, .5f, 0.f }, { 0.f, 0.f, 1.f }, { 1.f, 0.f } }, { { -.5f, -.5f, 0.f }, { 0.f, 0.f, 1.f }, { 0.f, 1.f } }, { { .5f, -.5f, 0.f }, { 0.f, 0.f, 1.f }, { 1.f, 1.f } } }; uint16_t indexData[] = { 0, 2, 1, 1, 2, 3 }; if (vertices) { memcpy(vertices, vertexData, sizeof(vertexData)); memcpy(indices, indexData, sizeof(indexData)); } } void lovrPassMesh(Pass* pass, Buffer* vertices, Buffer* indices, float* transform, uint32_t start, uint32_t count, uint32_t instances, uint32_t baseVertex) { lovrCheck(!indices || indices->info.format, "Buffer must have been created with a format to use it as a%s buffer", "n index"); lovrCheck(!vertices || vertices->info.format, "Buffer must have been created with a format to use it as a%s buffer", " vertex"); lovrCheck(!vertices || !vertices->info.complexFormat, "Vertex buffers must use a simple format without nested types or arrays"); if (count == ~0u) { if (indices || vertices) { Buffer* buffer = indices ? indices : vertices; count = buffer->info.format->length - start; } else { count = 0; } } else if (indices) { lovrCheck(count <= indices->info.format->length - start, "Mesh draw range exceeds index buffer size"); } else if (vertices) { lovrCheck(count <= vertices->info.format->length - start, "Mesh draw range exceeds vertex buffer size"); } lovrPassDraw(pass, &(DrawInfo) { .mode = pass->pipeline->mode, .vertex.buffer = vertices, .index.buffer = indices, .transform = transform, .start = start, .count = count, .instances = instances, .baseVertex = baseVertex }); } void lovrPassMeshIndirect(Pass* pass, Buffer* vertices, Buffer* indices, Buffer* draws, uint32_t count, uint32_t offset, uint32_t stride) { stride = stride ? stride : (indices ? 20 : 16); Shader* shader = pass->pipeline->shader; lovrCheck(shader, "A custom Shader must be bound to source draws from a Buffer"); lovrCheck(offset % 4 == 0, "Draw Buffer offset must be a multiple of 4"); lovrCheck(offset + count * stride < draws->info.size, "Draw buffer range exceeds the size of the buffer"); DrawInfo info = { .mode = pass->pipeline->mode, .vertex.buffer = vertices, .index.buffer = indices }; if (pass->drawCount >= pass->drawCapacity) { lovrAssert(pass->drawCount < 1 << 16, "Pass has too many draws!"); pass->drawCapacity = pass->drawCapacity > 0 ? pass->drawCapacity << 1 : 1; Draw* draws = lovrPassAllocate(pass, pass->drawCapacity * sizeof(Draw)); memcpy(draws, pass->draws, pass->drawCount * sizeof(Draw)); pass->draws = draws; } Draw* previous = pass->drawCount > 0 ? &pass->draws[pass->drawCount - 1] : NULL; Draw* draw = &pass->draws[pass->drawCount++]; draw->flags = DRAW_INDIRECT; draw->tally = pass->tally.active ? pass->tally.count : ~0u; draw->camera = pass->cameraCount - 1; pass->flags &= ~DIRTY_CAMERA; draw->shader = shader; lovrRetain(shader); draw->material = pass->pipeline->material; if (!draw->material) draw->material = state.defaultMaterial; trackMaterial(pass, draw->material); draw->indirect.buffer = draws->gpu; draw->indirect.offset = draws->base + offset; draw->indirect.count = count; draw->indirect.stride = stride; lovrPassResolvePipeline(pass, &info, draw, previous); lovrPassResolveVertices(pass, &info, draw); draw->bundleInfo = lovrPassResolveBindings(pass, shader, previous ? previous->bundleInfo : NULL); if (shader->uniformCount > 0 && pass->flags & DIRTY_UNIFORMS) { lovrPassResolveUniforms(pass, shader, &draw->uniformBuffer, &draw->uniformOffset); pass->flags &= ~DIRTY_UNIFORMS; } else { draw->uniformBuffer = previous ? previous->uniformBuffer : NULL; draw->uniformOffset = previous ? previous->uniformOffset : 0; } mat4_init(draw->transform, pass->transform); memcpy(draw->color, pass->pipeline->color, 4 * sizeof(float)); trackBuffer(pass, draws, GPU_PHASE_INDIRECT, GPU_CACHE_INDIRECT); } uint32_t lovrPassBeginTally(Pass* pass) { lovrCheck(pass->tally.count < MAX_TALLIES, "Pass has too many tallies!"); lovrCheck(!pass->tally.active, "Trying to start a tally, but the previous tally wasn't finished"); pass->tally.active = true; return pass->tally.count; } uint32_t lovrPassFinishTally(Pass* pass) { lovrCheck(pass->tally.active, "Trying to finish a tally, but no tally was started"); pass->tally.active = false; return pass->tally.count++; } Buffer* lovrPassGetTallyBuffer(Pass* pass, uint32_t* offset) { *offset = pass->tally.bufferOffset; return pass->tally.buffer; } void lovrPassSetTallyBuffer(Pass* pass, Buffer* buffer, uint32_t offset) { lovrCheck(offset % 4 == 0, "Tally buffer offset must be a multiple of 4"); lovrRelease(pass->tally.buffer, lovrBufferDestroy); pass->tally.buffer = buffer; pass->tally.bufferOffset = offset; lovrRetain(buffer); } void lovrPassCompute(Pass* pass, uint32_t x, uint32_t y, uint32_t z, Buffer* indirect, uint32_t offset) { if ((pass->computeCount & (pass->computeCount - 1)) == 0) { Compute* computes = lovrPassAllocate(pass, MAX(pass->computeCount << 1, 1) * sizeof(Compute)); memcpy(computes, pass->computes, pass->computeCount * sizeof(Compute)); pass->computes = computes; } Compute* previous = pass->computeCount > 0 ? &pass->computes[pass->computeCount - 1] : NULL; Compute* compute = &pass->computes[pass->computeCount++]; Shader* shader = pass->pipeline->shader; lovrCheck(shader->info.type == SHADER_COMPUTE, "To run a compute shader, a compute shader must be active"); lovrCheck(x <= state.limits.workgroupCount[0], "Compute %s count exceeds workgroupCount limit", "x"); lovrCheck(y <= state.limits.workgroupCount[1], "Compute %s count exceeds workgroupCount limit", "y"); lovrCheck(z <= state.limits.workgroupCount[2], "Compute %s count exceeds workgroupCount limit", "z"); compute->flags = 0; compute->shader = shader; lovrRetain(shader); compute->bundleInfo = lovrPassResolveBindings(pass, shader, previous ? previous->bundleInfo : NULL); if (shader->uniformCount > 0 && pass->flags & DIRTY_UNIFORMS) { lovrPassResolveUniforms(pass, shader, &compute->uniformBuffer, &compute->uniformOffset); pass->flags &= ~DIRTY_UNIFORMS; } else { compute->uniformBuffer = previous ? previous->uniformBuffer : NULL; compute->uniformOffset = previous ? previous->uniformOffset : 0; } if (indirect) { compute->flags |= COMPUTE_INDIRECT; compute->indirect.buffer = indirect->gpu; compute->indirect.offset = indirect->base + offset; trackBuffer(pass, indirect, GPU_PHASE_INDIRECT, GPU_CACHE_INDIRECT); } else { compute->x = x; compute->y = y; compute->z = z; } } void lovrPassBarrier(Pass* pass) { if (pass->computeCount > 0) { pass->computes[pass->computeCount - 1].flags |= COMPUTE_BARRIER; } } // Helpers static void* tempAlloc(Allocator* allocator, size_t size) { if (size == 0) { return NULL; } while (allocator->cursor + size > allocator->length) { lovrAssert(allocator->length << 1 <= allocator->limit, "Out of memory"); os_vm_commit(allocator->memory + allocator->length, allocator->length); allocator->length <<= 1; } uint32_t cursor = ALIGN(allocator->cursor, 8); allocator->cursor = cursor + size; return allocator->memory + cursor; } static size_t tempPush(Allocator* allocator) { return allocator->cursor; } static void tempPop(Allocator* allocator, size_t stack) { allocator->cursor = stack; } static gpu_pipeline* getPipeline(uint32_t index) { return (gpu_pipeline*) ((char*) state.pipelines + index * gpu_sizeof_pipeline()); } static BufferBlock* getBlock(gpu_buffer_type type, uint32_t size) { BufferBlock* block = state.bufferAllocators[type].freelist; if (block && block->size >= size && gpu_is_complete(block->tick)) { state.bufferAllocators[type].freelist = block->next; block->next = NULL; return block; } block = lovrMalloc(sizeof(BufferBlock) + gpu_sizeof_buffer()); block->handle = (gpu_buffer*) (block + 1); block->size = MAX(size, 1 << 22); block->next = NULL; block->ref = 0; gpu_buffer_init(block->handle, &(gpu_buffer_info) { .type = type, .size = block->size, .pointer = &block->pointer, .label = "Buffer Block" }); return block; } static void freeBlock(BufferAllocator* allocator, BufferBlock* block) { BufferBlock** list = &allocator->freelist; while (*list) list = (BufferBlock**) &(*list)->next; block->next = NULL; *list = block; } static BufferView allocateBuffer(BufferAllocator* allocator, gpu_buffer_type type, uint32_t size, size_t align) { uint32_t cursor = (uint32_t) ((allocator->cursor + (align - 1)) / align * align); BufferBlock* block = allocator->current; if (!block || cursor + size > block->size) { if (block && type != GPU_BUFFER_STATIC) { block->tick = state.tick; freeBlock(allocator, block); } block = getBlock(type, size); allocator->current = block; cursor = 0; } allocator->cursor = cursor + size; return (BufferView) { .block = block, .buffer = block->handle, .offset = cursor, .extent = size, .pointer = block->pointer ? (char*) block->pointer + cursor : NULL }; } static BufferView getBuffer(gpu_buffer_type type, uint32_t size, size_t align) { return allocateBuffer(&state.bufferAllocators[type], type, size, align); } static int u64cmp(const void* a, const void* b) { uint64_t x = *(uint64_t*) a, y = *(uint64_t*) b; return (x > y) - (x < y); } static uint32_t gcd(uint32_t a, uint32_t b) { return b ? gcd(b, a % b) : a; } static uint32_t lcm(uint32_t a, uint32_t b) { return (a / gcd(a, b)) * b; } static void beginFrame(void) { if (state.active) { return; } state.active = true; state.tick = gpu_begin(); state.stream = gpu_stream_begin("Internal"); memset(&state.barrier, 0, sizeof(gpu_barrier)); memset(&state.transferBarrier, 0, sizeof(gpu_barrier)); state.allocator.cursor = 0; processReadbacks(); } // When a Texture is garbage collected, if it has any transfer operations recorded to state.stream, // those transfers need to be submitted before it gets destroyed. The allocator offset is saved and // restored, which is pretty gross, but we don't want to invalidate temp memory (currently this is // only a problem for Font: when the font's atlas gets destroyed, it could invalidate the temp // memory used by Font:getLines and Pass:text). static void flushTransfers(void) { if (state.active) { size_t cursor = state.allocator.cursor; lovrGraphicsSubmit(NULL, 0); beginFrame(); state.allocator.cursor = cursor; } } static void processReadbacks(void) { while (state.oldestReadback && gpu_is_complete(state.oldestReadback->tick)) { Readback* readback = state.oldestReadback; switch (readback->type) { case READBACK_BUFFER: memcpy(readback->blob->data, readback->view.pointer, readback->view.extent); break; case READBACK_TEXTURE:; size_t size = lovrImageGetLayerSize(readback->image, 0); void* data = lovrImageGetLayerData(readback->image, 0, 0); memcpy(data, readback->view.pointer, size); break; case READBACK_TIMESTAMP:; uint32_t* timestamps = readback->view.pointer; for (uint32_t i = 0; i < readback->count; i++) { Pass* pass = readback->times[i].pass; pass->stats.submitTime = readback->times[i].cpuTime; pass->stats.gpuTime = (timestamps[2 * i + 1] - timestamps[2 * i + 0]) * state.limits.timestampPeriod / 1e9; } break; default: break; } Readback* next = readback->next; lovrRelease(readback, lovrReadbackDestroy); state.oldestReadback = next; } if (!state.oldestReadback) { state.newestReadback = NULL; } } static gpu_pass* getPass(Canvas* canvas) { gpu_pass_info info = { 0 }; for (uint32_t i = 0; i < canvas->count; i++) { info.color[i].format = (gpu_texture_format) canvas->color[i].texture->info.format; info.color[i].srgb = canvas->color[i].texture->info.srgb; info.color[i].load = canvas->resolve ? GPU_LOAD_OP_CLEAR : (gpu_load_op) canvas->color[i].load; } DepthAttachment* depth = &canvas->depth; if (depth->texture || depth->format) { info.depth.format = (gpu_texture_format) (depth->texture ? depth->texture->info.format : depth->format); info.depth.load = (gpu_load_op) canvas->resolve ? GPU_LOAD_OP_CLEAR : (gpu_load_op) depth->load; info.depth.save = depth->texture ? GPU_SAVE_OP_KEEP : GPU_SAVE_OP_DISCARD; info.depth.stencilLoad = info.depth.load; info.depth.stencilSave = info.depth.save; } info.colorCount = canvas->count; info.samples = canvas->samples; info.views = canvas->views; info.resolveColor = canvas->resolve; info.resolveDepth = canvas->resolve && !!depth->texture; info.surface = canvas->count > 0 && canvas->color[0].texture == state.window; uint64_t hash = hash64(&info, sizeof(info)); uint64_t value = map_get(&state.passLookup, hash); if (value == MAP_NIL) { gpu_pass* pass = lovrMalloc(gpu_sizeof_pass()); gpu_pass_init(pass, &info); map_set(&state.passLookup, hash, (uint64_t) (uintptr_t) pass); return pass; } return (gpu_pass*) (uintptr_t) value; } static size_t getLayout(gpu_slot* slots, uint32_t count) { uint64_t hash = hash64(slots, count * sizeof(gpu_slot)); size_t index; for (size_t index = 0; index < state.layouts.length; index++) { if (state.layouts.data[index].hash == hash) { return index; } } gpu_layout_info info = { .slots = slots, .count = count }; gpu_layout* handle = lovrMalloc(gpu_sizeof_layout()); gpu_layout_init(handle, &info); Layout layout = { .hash = hash, .gpu = handle }; index = state.layouts.length; arr_push(&state.layouts, layout); return index; } static gpu_bundle* getBundle(size_t layoutIndex, gpu_binding* bindings, uint32_t count) { Layout* layout = &state.layouts.data[layoutIndex]; BundlePool* pool = layout->head; const uint32_t POOL_SIZE = 512; gpu_bundle* bundle = NULL; if (pool) { if (pool->cursor < POOL_SIZE) { bundle = (gpu_bundle*) ((char*) pool->bundles + gpu_sizeof_bundle() * pool->cursor++); goto write; } // If the pool's closed, move it to the end of the list and try to use the next pool layout->tail->next = pool; layout->tail = pool; layout->head = pool->next; pool->next = NULL; pool->tick = state.tick; pool = layout->head; if (pool && gpu_is_complete(pool->tick)) { bundle = pool->bundles; pool->cursor = 1; goto write; } } // If no pool was available, make a new one pool = lovrMalloc(sizeof(BundlePool)); gpu_bundle_pool* gpu = lovrMalloc(gpu_sizeof_bundle_pool()); gpu_bundle* bundles = lovrMalloc(POOL_SIZE * gpu_sizeof_bundle()); pool->gpu = gpu; pool->bundles = bundles; pool->cursor = 1; pool->next = layout->head; gpu_bundle_pool_info info = { .bundles = pool->bundles, .layout = layout->gpu, .count = POOL_SIZE }; gpu_bundle_pool_init(pool->gpu, &info); layout->head = pool; if (!layout->tail) layout->tail = pool; bundle = pool->bundles; write: gpu_bundle_write(&bundle, &(gpu_bundle_info) { layout->gpu, bindings, count }, 1); return bundle; } static gpu_texture* getScratchTexture(gpu_stream* stream, Canvas* canvas, TextureFormat format, bool srgb) { uint16_t key[] = { canvas->width, canvas->height, canvas->views, format, srgb, canvas->samples }; uint32_t hash = (uint32_t) hash64(key, sizeof(key)); // Find a matching scratch texture that hasn't been used this frame for (uint32_t i = 0; i < state.scratchTextures.length; i++) { if (state.scratchTextures.data[i].hash == hash && state.scratchTextures.data[i].tick != state.tick) { return state.scratchTextures.data[i].texture; } } // Find something to evict ScratchTexture* scratch = NULL; for (uint32_t i = 0; i < state.scratchTextures.length; i++) { if (state.tick - state.scratchTextures.data[i].tick > 16) { scratch = &state.scratchTextures.data[i]; break; } } if (scratch) { gpu_texture_destroy(scratch->texture); } else { arr_expand(&state.scratchTextures, 1); scratch = &state.scratchTextures.data[state.scratchTextures.length++]; scratch->texture = lovrCalloc(gpu_sizeof_texture()); } gpu_texture_info info = { .type = GPU_TEXTURE_ARRAY, .format = (gpu_texture_format) format, .srgb = srgb, .size = { canvas->width, canvas->height, canvas->views }, .mipmaps = 1, .samples = canvas->samples, .usage = GPU_TEXTURE_RENDER, .upload.stream = stream }; gpu_texture_init(scratch->texture, &info); scratch->hash = hash; scratch->tick = state.tick; return scratch->texture; } static bool isDepthFormat(TextureFormat format) { return format == FORMAT_D16 || format == FORMAT_D32F || format == FORMAT_D24S8 || format == FORMAT_D32FS8; } static bool supportsSRGB(TextureFormat format) { switch (format) { case FORMAT_R8: case FORMAT_RG8: case FORMAT_RGBA8: case FORMAT_BC1: case FORMAT_BC2: case FORMAT_BC3: case FORMAT_BC7: case FORMAT_ASTC_4x4: case FORMAT_ASTC_5x4: case FORMAT_ASTC_5x5: case FORMAT_ASTC_6x5: case FORMAT_ASTC_6x6: case FORMAT_ASTC_8x5: case FORMAT_ASTC_8x6: case FORMAT_ASTC_8x8: case FORMAT_ASTC_10x5: case FORMAT_ASTC_10x6: case FORMAT_ASTC_10x8: case FORMAT_ASTC_10x10: case FORMAT_ASTC_12x10: case FORMAT_ASTC_12x12: return true; default: return false; } } // Returns number of bytes of a 3D texture region of a given format static uint32_t measureTexture(TextureFormat format, uint32_t w, uint32_t h, uint32_t d) { switch (format) { case FORMAT_R8: return w * h * d; case FORMAT_RG8: case FORMAT_R16: case FORMAT_R16F: case FORMAT_RGB565: case FORMAT_RGB5A1: case FORMAT_D16: return w * h * d * 2; case FORMAT_RGBA8: case FORMAT_RG16: case FORMAT_RG16F: case FORMAT_R32F: case FORMAT_RG11B10F: case FORMAT_RGB10A2: case FORMAT_D24S8: case FORMAT_D32F: return w * h * d * 4; case FORMAT_D32FS8: return w * h * d * 5; case FORMAT_RGBA16: case FORMAT_RGBA16F: case FORMAT_RG32F: return w * h * d * 8; case FORMAT_RGBA32F: return w * h * d * 16; case FORMAT_BC1: return ((w + 3) / 4) * ((h + 3) / 4) * d * 8; case FORMAT_BC2: return ((w + 3) / 4) * ((h + 3) / 4) * d * 16; case FORMAT_BC3: return ((w + 3) / 4) * ((h + 3) / 4) * d * 16; case FORMAT_BC4U: return ((w + 3) / 4) * ((h + 3) / 4) * d * 8; case FORMAT_BC4S: return ((w + 3) / 4) * ((h + 3) / 4) * d * 8; case FORMAT_BC5U: return ((w + 3) / 4) * ((h + 3) / 4) * d * 16; case FORMAT_BC5S: return ((w + 3) / 4) * ((h + 3) / 4) * d * 16; case FORMAT_BC6UF: return ((w + 3) / 4) * ((h + 3) / 4) * d * 16; case FORMAT_BC6SF: return ((w + 3) / 4) * ((h + 3) / 4) * d * 16; case FORMAT_BC7: return ((w + 3) / 4) * ((h + 3) / 4) * d * 16; case FORMAT_ASTC_4x4: return ((w + 3) / 4) * ((h + 3) / 4) * d * 16; case FORMAT_ASTC_5x4: return ((w + 4) / 5) * ((h + 3) / 4) * d * 16; case FORMAT_ASTC_5x5: return ((w + 4) / 5) * ((h + 4) / 5) * d * 16; case FORMAT_ASTC_6x5: return ((w + 5) / 6) * ((h + 4) / 5) * d * 16; case FORMAT_ASTC_6x6: return ((w + 5) / 6) * ((h + 5) / 6) * d * 16; case FORMAT_ASTC_8x5: return ((w + 7) / 8) * ((h + 4) / 5) * d * 16; case FORMAT_ASTC_8x6: return ((w + 7) / 8) * ((h + 5) / 6) * d * 16; case FORMAT_ASTC_8x8: return ((w + 7) / 8) * ((h + 7) / 8) * d * 16; case FORMAT_ASTC_10x5: return ((w + 9) / 10) * ((h + 4) / 5) * d * 16; case FORMAT_ASTC_10x6: return ((w + 9) / 10) * ((h + 5) / 6) * d * 16; case FORMAT_ASTC_10x8: return ((w + 9) / 10) * ((h + 7) / 8) * d * 16; case FORMAT_ASTC_10x10: return ((w + 9) / 10) * ((h + 9) / 10) * d * 16; case FORMAT_ASTC_12x10: return ((w + 11) / 12) * ((h + 9) / 10) * d * 16; case FORMAT_ASTC_12x12: return ((w + 11) / 12) * ((h + 11) / 12) * d * 16; default: lovrUnreachable(); } } // Errors if a 3D texture region exceeds the texture's bounds static void checkTextureBounds(const TextureInfo* info, uint32_t offset[4], uint32_t extent[3]) { uint32_t maxWidth = MAX(info->width >> offset[3], 1); uint32_t maxHeight = MAX(info->height >> offset[3], 1); uint32_t maxLayers = info->type == TEXTURE_3D ? MAX(info->layers >> offset[3], 1) : info->layers; lovrCheck(offset[0] + extent[0] <= maxWidth, "Texture x range [%d,%d] exceeds width (%d)", offset[0], offset[0] + extent[0], maxWidth); lovrCheck(offset[1] + extent[1] <= maxHeight, "Texture y range [%d,%d] exceeds height (%d)", offset[1], offset[1] + extent[1], maxHeight); lovrCheck(offset[2] + extent[2] <= maxLayers, "Texture layer range [%d,%d] exceeds layer count (%d)", offset[2], offset[2] + extent[2], maxLayers); lovrCheck(offset[3] < info->mipmaps, "Texture mipmap %d exceeds its mipmap count (%d)", offset[3] + 1, info->mipmaps); } static void mipmapTexture(gpu_stream* stream, Texture* texture, uint32_t base, uint32_t count) { if (count == ~0u) count = texture->info.mipmaps - (base + 1); bool volumetric = texture->info.type == TEXTURE_3D; for (uint32_t i = 0; i < count; i++) { uint32_t level = base + i + 1; uint32_t srcOffset[4] = { 0, 0, 0, level - 1 }; uint32_t dstOffset[4] = { 0, 0, 0, level }; uint32_t srcExtent[3] = { MAX(texture->info.width >> (level - 1), 1), MAX(texture->info.height >> (level - 1), 1), volumetric ? MAX(texture->info.layers >> (level - 1), 1) : 1 }; uint32_t dstExtent[3] = { MAX(texture->info.width >> level, 1), MAX(texture->info.height >> level, 1), volumetric ? MAX(texture->info.layers >> level, 1) : 1 }; gpu_blit(stream, texture->root->gpu, texture->root->gpu, srcOffset, dstOffset, srcExtent, dstExtent, GPU_FILTER_LINEAR); if (i != count - 1) { gpu_sync(stream, &(gpu_barrier) { .prev = GPU_PHASE_BLIT, .next = GPU_PHASE_BLIT, .flush = GPU_CACHE_TRANSFER_WRITE, .clear = GPU_CACHE_TRANSFER_READ }, 1); } } } static ShaderResource* findShaderResource(Shader* shader, const char* name, size_t length) { uint32_t hash = (uint32_t) hash64(name, length); for (uint32_t i = 0; i < shader->resourceCount; i++) { if (shader->resources[i].hash == hash) { return &shader->resources[i]; } } lovrThrow("Shader has no variable named '%s'", name); } static Access* getNextAccess(Pass* pass, int type, bool texture) { AccessBlock* block = pass->access[type]; if (!block || block->count >= COUNTOF(block->list)) { AccessBlock* new = lovrPassAllocate(pass, sizeof(AccessBlock)); pass->access[type] = new; new->next = block; new->count = 0; new->textureMask = 0; block = new; } block->textureMask |= (uint64_t) texture << block->count; return &block->list[block->count++]; } static void trackBuffer(Pass* pass, Buffer* buffer, gpu_phase phase, gpu_cache cache) { if (!buffer) return; Access* access = getNextAccess(pass, phase == GPU_PHASE_SHADER_COMPUTE ? ACCESS_COMPUTE : ACCESS_RENDER, false); access->sync = &buffer->sync; access->object = buffer; access->phase = phase; access->cache = cache; lovrRetain(buffer); } static void trackTexture(Pass* pass, Texture* texture, gpu_phase phase, gpu_cache cache) { if (!texture) return; // Sample-only textures can skip sync, but still need to be refcounted if (texture->root->info.usage == TEXTURE_SAMPLE) { phase = 0; cache = 0; } Access* access = getNextAccess(pass, phase == GPU_PHASE_SHADER_COMPUTE ? ACCESS_COMPUTE : ACCESS_RENDER, true); access->sync = &texture->root->sync; access->object = texture; access->phase = phase; access->cache = cache; lovrRetain(texture); } static void trackMaterial(Pass* pass, Material* material) { lovrRetain(material); if (!material->hasWritableTexture) { return; } gpu_phase phase = GPU_PHASE_SHADER_VERTEX | GPU_PHASE_SHADER_FRAGMENT; gpu_cache cache = GPU_CACHE_TEXTURE; trackTexture(pass, material->info.texture, phase, cache); trackTexture(pass, material->info.glowTexture, phase, cache); trackTexture(pass, material->info.metalnessTexture, phase, cache); trackTexture(pass, material->info.roughnessTexture, phase, cache); trackTexture(pass, material->info.clearcoatTexture, phase, cache); trackTexture(pass, material->info.occlusionTexture, phase, cache); trackTexture(pass, material->info.normalTexture, phase, cache); } static bool syncResource(Access* access, gpu_barrier* barrier) { // There are 4 types of access patterns: // - read after read: // - no hazard, no barrier necessary // - read after write: // - needs execution dependency to ensure the read happens after the write // - needs to flush the writes from the cache // - needs to clear the cache for the read so it gets the new data // - only needs to happen once for each type of read after a write (tracked by pendingReads) // - if a second read happens, the first read would have already synchronized (transitive) // - write after write: // - needs execution dependency to ensure writes don't overlap // - needs to flush and clear the cache // - clears pendingReads // - write after read: // - needs execution dependency to ensure write starts after read is finished // - does not need to flush any caches // - does clear the write cache // - clears pendingReads Sync* sync = access->sync; uint32_t read = access->cache & GPU_CACHE_READ_MASK; uint32_t write = access->cache & GPU_CACHE_WRITE_MASK; uint32_t newReads = read & ~sync->pendingReads; bool hasNewReads = newReads || (access->phase & ~sync->readPhase); bool readAfterWrite = read && sync->pendingWrite && hasNewReads; bool writeAfterWrite = write && sync->pendingWrite && !sync->pendingReads; bool writeAfterRead = write && sync->pendingReads; if (readAfterWrite) { barrier->prev |= sync->writePhase; barrier->next |= access->phase; barrier->flush |= sync->pendingWrite; barrier->clear |= newReads; sync->readPhase |= access->phase; sync->pendingReads |= read; } if (writeAfterWrite) { barrier->prev |= sync->writePhase; barrier->next |= access->phase; barrier->flush |= sync->pendingWrite; barrier->clear |= write; } if (writeAfterRead) { barrier->prev |= sync->readPhase; barrier->next |= access->phase; sync->readPhase = 0; sync->pendingReads = 0; } if (write) { sync->writePhase = access->phase; sync->pendingWrite = write; } return write; } static gpu_barrier syncTransfer(Sync* sync, gpu_phase phase, gpu_cache cache) { gpu_barrier localBarrier = { 0 }; gpu_barrier* barrier = NULL; // If there was already a transfer write to the resource this frame, a "just in time" barrier is required // If this is a transfer write, a "just in time" barrier is only needed if there's been a transfer read this frame // Otherwise, the barrier can go at the beginning of the frame and get batched with other barriers if (sync->lastTransferWrite == state.tick || (sync->lastTransferRead == state.tick && (cache & GPU_CACHE_WRITE_MASK))) { barrier = &localBarrier; } else { barrier = &state.transferBarrier; } syncResource(&(Access) { sync, NULL, phase, cache }, barrier); if (cache & GPU_CACHE_READ_MASK) sync->lastTransferRead = state.tick; if (cache & GPU_CACHE_WRITE_MASK) sync->lastTransferWrite = state.tick; return localBarrier; } static void updateModelTransforms(Model* model, uint32_t nodeIndex, float* parent) { mat4 global = model->globalTransforms + 16 * nodeIndex; NodeTransform* local = &model->localTransforms[nodeIndex]; mat4_init(global, parent); mat4_translate(global, local->position[0], local->position[1], local->position[2]); mat4_rotateQuat(global, local->rotation); mat4_scale(global, local->scale[0], local->scale[1], local->scale[2]); ModelNode* node = &model->info.data->nodes[nodeIndex]; for (uint32_t i = 0; i < node->childCount; i++) { updateModelTransforms(model, node->children[i], global); } } // Only an explicit set of SPIR-V capabilities are allowed // Some capabilities require a GPU feature to be supported // Some common unsupported capabilities are checked directly, to provide better error messages static void checkShaderFeatures(uint32_t* features, uint32_t count) { for (uint32_t i = 0; i < count; i++) { switch (features[i]) { case 0: break; // Matrix case 1: break; // Shader case 2: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "geometry shading"); case 3: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "tessellation shading"); case 5: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "linkage"); case 9: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "half floats"); case 10: lovrCheck(state.features.float64, "GPU does not support shader feature #%d: %s", features[i], "64 bit floats"); break; case 11: lovrCheck(state.features.int64, "GPU does not support shader feature #%d: %s", features[i], "64 bit integers"); break; case 12: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "64 bit atomics"); case 22: lovrCheck(state.features.int16, "GPU does not support shader feature #%d: %s", features[i], "16 bit integers"); break; case 23: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "tessellation shading"); case 24: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "geometry shading"); case 25: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "extended image gather"); case 27: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "multisample storage textures"); case 32: lovrCheck(state.limits.clipDistances > 0, "GPU does not support shader feature #%d: %s", features[i], "clip distance"); break; case 33: lovrCheck(state.limits.cullDistances > 0, "GPU does not support shader feature #%d: %s", features[i], "cull distance"); break; case 34: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "cubemap array textures"); case 35: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "sample rate shading"); case 36: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "rectangle textures"); case 37: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "rectangle textures"); case 39: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "8 bit integers"); case 40: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "input attachments"); case 41: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "sparse residency"); case 42: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "min LOD"); case 43: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "1D textures"); case 44: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "1D textures"); case 45: break; // Cubemap arrays case 46: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "texel buffers"); case 47: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "texel buffers"); case 48: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "multisampled storage textures"); case 49: break; // StorageImageExtendedFormats (?) case 50: break; // ImageQuery case 51: break; // DerivativeControl case 52: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "sample rate shading"); case 53: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "transform feedback"); case 54: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "geometry shading"); case 55: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "autoformat storage textures"); case 56: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "autoformat storage textures"); case 57: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "multiviewport"); case 69: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "layered rendering"); case 70: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "multiviewport"); case 4427: break; // ShaderDrawParameters case 4437: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "multigpu"); case 4439: lovrCheck(state.limits.renderSize[2] > 1, "GPU does not support shader feature #%d: %s", features[i], "multiview"); break; case 5301: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "non-uniform indexing"); case 5306: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "non-uniform indexing"); case 5307: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "non-uniform indexing"); case 5308: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "non-uniform indexing"); case 5309: lovrThrow("Shader uses unsupported feature #%d: %s", features[i], "non-uniform indexing"); default: lovrThrow("Shader uses unknown feature #%d", features[i]); } } } static void onResize(uint32_t width, uint32_t height) { float density = os_window_get_pixel_density(); width *= density; height *= density; state.window->info.width = width; state.window->info.height = height; gpu_surface_resize(width, height); lovrEventPush((Event) { .type = EVENT_RESIZE, .data.resize.width = width, .data.resize.height = height }); } static void onMessage(void* context, const char* message, bool severe) { if (severe) { #ifdef _WIN32 if (!state.defaultTexture) { // Hacky way to determine if initialization has completed const char* format = "This program requires a graphics card with support for Vulkan 1.1, but no device was found or it failed to initialize properly. The error message was:\n\n%s"; size_t size = snprintf(NULL, 0, format, message) + 1; char* string = lovrMalloc(size); snprintf(string, size, format, message); os_window_message_box(string); lovrFree(string); exit(1); } #endif lovrThrow("GPU error: %s", message); } else { lovrLog(LOG_DEBUG, "GPU", message); } }