From ed55ab17dbd4aadb5eb3491fad8786c356397af4 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Dec 2024 13:25:27 +0100 Subject: [PATCH] panvk: Factor-out the sysvals initialization logic We're about to make the sysval logic a bit more complication when introducing push constant packing. Let's first factor-out the sysvals handling so the JM/CSF backend don't have to duplicate the thing. Signed-off-by: Boris Brezillon Reviewed-by: Chia-I Wu Reviewed-by: Mary Guillemard Reviewed-by: Lars-Ivar Hesselberg Simonsen Part-of: --- .../vulkan/csf/panvk_vX_cmd_dispatch.c | 55 ++--- src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c | 134 +--------- .../vulkan/jm/panvk_vX_cmd_dispatch.c | 54 ++-- src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c | 232 ++++++------------ src/panfrost/vulkan/meson.build | 1 + src/panfrost/vulkan/panvk_cmd_dispatch.h | 18 ++ src/panfrost/vulkan/panvk_cmd_draw.h | 34 +++ src/panfrost/vulkan/panvk_vX_cmd_dispatch.c | 54 ++++ src/panfrost/vulkan/panvk_vX_cmd_draw.c | 145 +++++++++++ 9 files changed, 356 insertions(+), 371 deletions(-) create mode 100644 src/panfrost/vulkan/panvk_vX_cmd_dispatch.c diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index 483ce6b33a9..b86db4e4e20 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -73,20 +73,6 @@ prepare_push_uniforms(struct panvk_cmd_buffer *cmdbuf) : VK_ERROR_OUT_OF_DEVICE_MEMORY; } -struct panvk_dispatch_info { - uint32_t baseGroupX; - uint32_t baseGroupY; - uint32_t baseGroupZ; - struct { - uint32_t groupCountX; - uint32_t groupCountY; - uint32_t groupCountZ; - } direct; - struct { - uint64_t buffer_dev_addr; - } indirect; -}; - static void calculate_task_axis_and_increment(const struct panvk_shader *shader, struct panvk_physical_device *phys_dev, @@ -204,9 +190,9 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) * to calculate the maximum number of workgroups we can execute * concurrently. */ struct pan_compute_dim dim = { - info->direct.groupCountX, - info->direct.groupCountY, - info->direct.groupCountZ, + info->direct.wg_count.x, + info->direct.wg_count.y, + info->direct.wg_count.z, }; tlsinfo.wls.instances = pan_wls_instances(&dim); @@ -246,20 +232,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) return; } - struct panvk_compute_sysvals *sysvals = &cmdbuf->state.compute.sysvals; - sysvals->base.x = info->baseGroupX; - sysvals->base.y = info->baseGroupY; - sysvals->base.z = info->baseGroupZ; - /* If indirect, sysvals->num_work_groups will be written by the CS */ - if (!indirect) { - sysvals->num_work_groups.x = info->direct.groupCountX; - sysvals->num_work_groups.y = info->direct.groupCountY; - sysvals->num_work_groups.z = info->direct.groupCountZ; - } - sysvals->local_group_size.x = shader->local_size.x; - sysvals->local_group_size.y = shader->local_size.y; - sysvals->local_group_size.z = shader->local_size.z; - compute_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, info); result = prepare_driver_set(cmdbuf); if (result != VK_SUCCESS) @@ -321,11 +294,11 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) } cs_move32_to(b, cs_sr_reg32(b, 33), wg_size.opaque[0]); cs_move32_to(b, cs_sr_reg32(b, 34), - info->baseGroupX * shader->local_size.x); + info->direct.wg_base.x * shader->local_size.x); cs_move32_to(b, cs_sr_reg32(b, 35), - info->baseGroupY * shader->local_size.y); + info->direct.wg_base.y * shader->local_size.y); cs_move32_to(b, cs_sr_reg32(b, 36), - info->baseGroupZ * shader->local_size.z); + info->direct.wg_base.z * shader->local_size.z); if (indirect) { /* Load parameters from indirect buffer and update workgroup count * registers and sysvals */ @@ -342,9 +315,9 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info) offsetof(struct panvk_compute_sysvals, num_work_groups)); cs_wait_slot(b, SB_ID(LS), false); } else { - cs_move32_to(b, cs_sr_reg32(b, 37), info->direct.groupCountX); - cs_move32_to(b, cs_sr_reg32(b, 38), info->direct.groupCountY); - cs_move32_to(b, cs_sr_reg32(b, 39), info->direct.groupCountZ); + cs_move32_to(b, cs_sr_reg32(b, 37), info->direct.wg_count.x); + cs_move32_to(b, cs_sr_reg32(b, 38), info->direct.wg_count.y); + cs_move32_to(b, cs_sr_reg32(b, 39), info->direct.wg_count.z); } } @@ -412,10 +385,10 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); struct panvk_dispatch_info info = { - baseGroupX, - baseGroupY, - baseGroupZ, - .direct = {groupCountX, groupCountY, groupCountZ}, + .direct = { + .wg_base = {baseGroupX, baseGroupY, baseGroupZ}, + .wg_count = {groupCountX, groupCountY, groupCountZ}, + } }; cmd_dispatch(cmdbuf, &info); } diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index f7267e538be..58279203d0c 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -42,30 +42,6 @@ #include "vk_pipeline_layout.h" #include "vk_render_pass.h" -struct panvk_draw_info { - struct { - uint32_t size; - uint32_t offset; - } index; - - struct { - int32_t base; - uint32_t count; - } vertex; - - struct { - int32_t base; - uint32_t count; - } instance; - - struct { - struct panvk_buffer *buffer; - uint64_t offset; - uint32_t draw_count; - uint32_t stride; - } indirect; -}; - static void emit_vs_attrib(const struct vk_vertex_attribute_state *attrib_info, const struct vk_vertex_binding_state *buf_info, @@ -217,104 +193,6 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf) return VK_SUCCESS; } -/* This value has been selected to get - * dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero passing. - */ -#define MIN_DEPTH_CLIP_RANGE 37.7E-06f - -static void -prepare_sysvals(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_draw_info *draw) -{ - struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals; - struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb; - const struct vk_rasterization_state *rs = - &cmdbuf->vk.dynamic_graphics_state.rs; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; - - uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0; - if (sysvals->vs.noperspective_varyings != noperspective_varyings) { - sysvals->vs.noperspective_varyings = noperspective_varyings; - cmdbuf->state.gfx.push_uniforms = 0; - } - - if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) { - for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++) - sysvals->blend.constants[i] = - CLAMP(cb->blend_constants[i], 0.0f, 1.0f); - gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); - } - - if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) || - dyn_gfx_state_dirty(cmdbuf, RS_CULL_MODE) || - dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE)) { - VkViewport *viewport = &cmdbuf->vk.dynamic_graphics_state.vp.viewports[0]; - - /* Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of - * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the - * end of the section, the spec defines: - * - * px = width - * py = height - * pz = maxDepth - minDepth - */ - sysvals->viewport.scale.x = 0.5f * viewport->width; - sysvals->viewport.scale.y = 0.5f * viewport->height; - sysvals->viewport.scale.z = (viewport->maxDepth - viewport->minDepth); - - /* Upload the viewport offset. Defined as (ox, oy, oz) at the start of - * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the - * end of the section, the spec defines: - * - * ox = x + width/2 - * oy = y + height/2 - * oz = minDepth - */ - sysvals->viewport.offset.x = (0.5f * viewport->width) + viewport->x; - sysvals->viewport.offset.y = (0.5f * viewport->height) + viewport->y; - sysvals->viewport.offset.z = viewport->minDepth; - - /* Doing the viewport transform in the vertex shader and then depth - * clipping with the viewport depth range gets a similar result to - * clipping in clip-space, but loses precision when the viewport depth - * range is very small. When minDepth == maxDepth, this completely - * flattens the clip-space depth and results in never clipping. - * - * To work around this, set a lower limit on depth range when clipping is - * enabled. This results in slightly incorrect fragment depth values, and - * doesn't help with the precision loss, but at least clipping isn't - * completely broken. - */ - if (vk_rasterization_state_depth_clip_enable(rs) && - fabsf(sysvals->viewport.scale.z) < MIN_DEPTH_CLIP_RANGE) { - float z_min = viewport->minDepth; - float z_max = viewport->maxDepth; - float z_sign = z_min <= z_max ? 1.0f : -1.0f; - - sysvals->viewport.scale.z = z_sign * MIN_DEPTH_CLIP_RANGE; - - /* Middle of the user range is - * z_range_center = z_min + (z_max - z_min) * 0.5f, - * and we want to set the offset to - * z_offset = z_range_center - viewport.scale.z * 0.5f - * which, when expanding, gives us - * z_offset = (z_max + z_min - viewport.scale.z) * 0.5f - */ - float z_offset = (z_max + z_min - sysvals->viewport.scale.z) * 0.5f; - /* Bump offset off-center if necessary, to not go out of range */ - sysvals->viewport.offset.z = CLAMP(z_offset, 0.0f, 1.0f); - } - gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); - } - - if (draw->vertex.base != sysvals->vs.first_vertex || - draw->instance.base != sysvals->vs.base_instance) { - sysvals->vs.first_vertex = draw->vertex.base; - sysvals->vs.base_instance = draw->instance.base; - gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); - } -} - static bool has_depth_att(struct panvk_cmd_buffer *cmdbuf) { @@ -1664,7 +1542,7 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) return result; } - prepare_sysvals(cmdbuf, draw); + panvk_per_arch(cmd_prepare_draw_sysvals)(cmdbuf, draw); result = prepare_push_uniforms(cmdbuf); if (result != VK_SUCCESS) @@ -1925,9 +1803,7 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, return; struct cs_index draw_params_addr = cs_scratch_reg64(b, 0); - cs_move64_to( - b, draw_params_addr, - panvk_buffer_gpu_ptr(draw->indirect.buffer, draw->indirect.offset)); + cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr); cs_update_vt_ctx(b) { cs_move32_to(b, cs_sr_reg32(b, 32), 0); @@ -1981,8 +1857,7 @@ panvk_per_arch(CmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer _buffer, return; struct panvk_draw_info draw = { - .indirect.buffer = buffer, - .indirect.offset = offset, + .indirect.buffer_dev_addr = panvk_buffer_gpu_ptr(buffer, offset), .indirect.draw_count = drawCount, .indirect.stride = stride, }; @@ -2003,8 +1878,7 @@ panvk_per_arch(CmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer, struct panvk_draw_info draw = { .index.size = cmdbuf->state.gfx.ib.index_size, - .indirect.buffer = buffer, - .indirect.offset = offset, + .indirect.buffer_dev_addr = panvk_buffer_gpu_ptr(buffer, offset), .indirect.draw_count = drawCount, .indirect.stride = stride, }; diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c index eb0cd547dec..2e5128e8180 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c @@ -26,12 +26,6 @@ #include -struct panvk_dispatch_info { - struct pan_compute_dim wg_count; - mali_ptr tsd; - mali_ptr push_uniforms; -}; - VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY, @@ -49,12 +43,16 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, if (!panvk_priv_mem_dev_addr(shader->rsd)) return; + struct panvk_dispatch_info info = { + .direct = { + .wg_base = {baseGroupX, baseGroupY, baseGroupZ}, + .wg_count = {groupCountX, groupCountY, groupCountZ}, + }, + }; struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); struct panvk_physical_device *phys_dev = to_panvk_physical_device(dev->vk.physical); - struct panvk_dispatch_info dispatch = { - .wg_count = {groupCountX, groupCountY, groupCountZ}, - }; + struct pan_compute_dim wg_count = {groupCountX, groupCountY, groupCountZ}; panvk_per_arch(cmd_close_batch)(cmdbuf); struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf); @@ -65,42 +63,22 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, &cmdbuf->state.compute.cs.desc; panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); - dispatch.tsd = batch->tls.gpu; + mali_ptr tsd = batch->tls.gpu; result = panvk_per_arch(cmd_prepare_push_descs)( cmdbuf, desc_state, shader->desc_info.used_set_mask); if (result != VK_SUCCESS) return; - struct panvk_compute_sysvals *sysvals = &cmdbuf->state.compute.sysvals; - sysvals->base.x = baseGroupX; - sysvals->base.y = baseGroupY; - sysvals->base.z = baseGroupZ; - sysvals->num_work_groups.x = groupCountX; - sysvals->num_work_groups.y = groupCountY; - sysvals->num_work_groups.z = groupCountZ; - sysvals->local_group_size.x = shader->local_size.x; - sysvals->local_group_size.y = shader->local_size.y; - sysvals->local_group_size.z = shader->local_size.z; - if (compute_state_dirty(cmdbuf, CS) || compute_state_dirty(cmdbuf, DESC_STATE)) { result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, shader, cs_desc_state); if (result != VK_SUCCESS) return; - - sysvals->desc.sets[PANVK_DESC_TABLE_CS_DYN_SSBOS] = - cs_desc_state->dyn_ssbos; } - for (uint32_t i = 0; i < MAX_SETS; i++) { - if (shader->desc_info.used_set_mask & BITFIELD_BIT(i)) - sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev; - } - - /* We unconditionally update the sysvals, so push_uniforms is always dirty. */ - compute_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + panvk_per_arch(cmd_prepare_dispatch_sysvals)(cmdbuf, &info); if (compute_state_dirty(cmdbuf, PUSH_UNIFORMS)) { cmdbuf->state.compute.push_uniforms = panvk_per_arch( @@ -109,7 +87,7 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, return; } - dispatch.push_uniforms = cmdbuf->state.compute.push_uniforms; + mali_ptr push_uniforms = cmdbuf->state.compute.push_uniforms; struct panfrost_ptr copy_desc_job = {0}; @@ -135,9 +113,9 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, util_dynarray_append(&batch->jobs, void *, job.cpu); panfrost_pack_work_groups_compute( - pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), dispatch.wg_count.x, - dispatch.wg_count.y, dispatch.wg_count.z, shader->local_size.x, - shader->local_size.y, shader->local_size.z, false, false); + pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), wg_count.x, wg_count.y, + wg_count.z, shader->local_size.x, shader->local_size.y, + shader->local_size.z, false, false); pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { cfg.job_task_split = util_logbase2_ceil(shader->local_size.x + 1) + @@ -150,9 +128,9 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, cfg.attributes = cs_desc_state->img_attrib_table; cfg.attribute_buffers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG]; - cfg.thread_storage = dispatch.tsd; + cfg.thread_storage = tsd; cfg.uniform_buffers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO]; - cfg.push_uniforms = dispatch.push_uniforms; + cfg.push_uniforms = push_uniforms; cfg.textures = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE]; cfg.samplers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER]; } @@ -172,7 +150,7 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, unsigned core_id_range; panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range); - batch->tlsinfo.wls.instances = pan_wls_instances(&dispatch.wg_count); + batch->tlsinfo.wls.instances = pan_wls_instances(&wg_count); batch->wls_total_size = pan_wls_adjust_size(batch->tlsinfo.wls.size) * batch->tlsinfo.wls.instances * core_id_range; } diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index eb5d2a9fd65..c09bf6e274b 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -36,19 +36,10 @@ #include "vk_meta.h" #include "vk_pipeline_layout.h" -struct panvk_draw_info { - unsigned first_index; - unsigned index_count; - unsigned index_size; - unsigned first_vertex; - unsigned vertex_count; +struct panvk_draw_data { + struct panvk_draw_info info; unsigned vertex_range; unsigned padded_vertex_count; - unsigned first_instance; - unsigned instance_count; - int vertex_offset; - int offset_start; - uint32_t layer_id; struct mali_invocation_packed invocation; struct { mali_ptr varyings; @@ -84,106 +75,6 @@ struct panvk_draw_info { } jobs; }; -static VkResult -panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) -{ - const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; - const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; - - struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; - struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; - struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; - struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals; - struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb; - - int32_t first_vertex = - draw->index_size ? draw->vertex_offset : draw->first_vertex; - uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0; - if (sysvals->vs.raw_vertex_offset != draw->offset_start || - sysvals->vs.first_vertex != first_vertex || - sysvals->vs.base_instance != draw->first_instance || - sysvals->layer_id != draw->layer_id || - sysvals->vs.noperspective_varyings != noperspective_varyings) { - sysvals->vs.raw_vertex_offset = draw->offset_start; - sysvals->vs.first_vertex = first_vertex; - sysvals->vs.base_instance = draw->first_instance; - sysvals->vs.noperspective_varyings = noperspective_varyings; - sysvals->layer_id = draw->layer_id; - - gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); - } - - if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) { - for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++) - sysvals->blend.constants[i] = - CLAMP(cb->blend_constants[i], 0.0f, 1.0f); - gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); - } - - if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS)) { - VkViewport *viewport = &cmdbuf->vk.dynamic_graphics_state.vp.viewports[0]; - - /* Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of - * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the - * end of the section, the spec defines: - * - * px = width - * py = height - * pz = maxDepth - minDepth - */ - sysvals->viewport.scale.x = 0.5f * viewport->width; - sysvals->viewport.scale.y = 0.5f * viewport->height; - sysvals->viewport.scale.z = (viewport->maxDepth - viewport->minDepth); - - /* Upload the viewport offset. Defined as (ox, oy, oz) at the start of - * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the - * end of the section, the spec defines: - * - * ox = x + width/2 - * oy = y + height/2 - * oz = minDepth - */ - sysvals->viewport.offset.x = (0.5f * viewport->width) + viewport->x; - sysvals->viewport.offset.y = (0.5f * viewport->height) + viewport->y; - sysvals->viewport.offset.z = viewport->minDepth; - gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); - } - - if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) { - VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)( - cmdbuf, desc_state, vs, vs_desc_state); - if (result != VK_SUCCESS) - return result; - - sysvals->desc.sets[PANVK_DESC_TABLE_VS_DYN_SSBOS] = - vs_desc_state->dyn_ssbos; - gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); - } - - if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) { - VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)( - cmdbuf, desc_state, fs, fs_desc_state); - if (result != VK_SUCCESS) - return result; - - sysvals->desc.sets[PANVK_DESC_TABLE_FS_DYN_SSBOS] = - fs_desc_state->dyn_ssbos; - gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); - } - - for (uint32_t i = 0; i < MAX_SETS; i++) { - uint32_t used_set_mask = - vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0); - - if (used_set_mask & BITFIELD_BIT(i)) - sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev; - gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); - } - - return VK_SUCCESS; -} - static bool has_depth_att(struct panvk_cmd_buffer *cmdbuf) { @@ -288,7 +179,7 @@ translate_stencil_op(VkStencilOp in) static VkResult panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { bool dirty = dyn_gfx_state_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) || dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE) || @@ -469,11 +360,11 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, static VkResult panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; VkResult result = - panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, draw->layer_id); + panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, draw->info.layer_id); if (result != VK_SUCCESS) return result; @@ -513,7 +404,7 @@ panvk_varying_hw_format(gl_shader_stage stage, gl_varying_slot loc, static VkResult panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct panvk_shader_link *link = &cmdbuf->state.gfx.link; @@ -528,7 +419,8 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, bool writes_point_size = vs->info.vs.writes_point_size && ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST; - unsigned vertex_count = draw->padded_vertex_count * draw->instance_count; + unsigned vertex_count = + draw->padded_vertex_count * draw->info.instance.count; mali_ptr psiz_buf = 0; for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { @@ -571,7 +463,7 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, } static void -panvk_draw_emit_attrib_buf(const struct panvk_draw_info *draw, +panvk_draw_emit_attrib_buf(const struct panvk_draw_data *draw, const struct vk_vertex_binding_state *buf_info, const struct panvk_attrib_buf *buf, void *desc) { @@ -582,7 +474,7 @@ panvk_draw_emit_attrib_buf(const struct panvk_draw_info *draw, void *buf_ext = desc + pan_size(ATTRIBUTE_BUFFER); /* TODO: support instanced arrays */ - if (draw->instance_count <= 1) { + if (draw->info.instance.count <= 1) { pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { cfg.type = MALI_ATTRIBUTE_TYPE_1D; cfg.stride = per_instance ? 0 : buf_info->stride; @@ -642,7 +534,7 @@ panvk_draw_emit_attrib_buf(const struct panvk_draw_info *draw, } static void -panvk_draw_emit_attrib(const struct panvk_draw_info *draw, +panvk_draw_emit_attrib(const struct panvk_draw_data *draw, const struct vk_vertex_attribute_state *attrib_info, const struct vk_vertex_binding_state *buf_info, const struct panvk_attrib_buf *buf, void *desc) @@ -657,7 +549,7 @@ panvk_draw_emit_attrib(const struct panvk_draw_info *draw, cfg.offset_enable = true; if (per_instance) - cfg.offset += draw->first_instance * buf_info->stride; + cfg.offset += draw->info.instance.base * buf_info->stride; cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw; } @@ -665,7 +557,7 @@ panvk_draw_emit_attrib(const struct panvk_draw_info *draw, static VkResult panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct vk_vertex_input_state *vi = @@ -736,7 +628,7 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, static void panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { panvk_draw_prepare_vs_attribs(cmdbuf, draw); draw->vs.attributes = cmdbuf->state.gfx.vs.attribs; @@ -791,7 +683,7 @@ panvk_emit_viewport(const struct vk_viewport_state *vp, void *vpd) static VkResult panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { /* When rasterizerDiscardEnable is active, it is allowed to have viewport and * scissor disabled. @@ -817,7 +709,7 @@ panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, static void panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_draw_info *draw, void *dcd) + const struct panvk_draw_data *draw, void *dcd) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct panvk_shader_desc_state *vs_desc_state = @@ -830,9 +722,9 @@ panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf, cfg.varyings = draw->vs.varyings; cfg.varying_buffers = draw->varying_bufs; cfg.thread_storage = draw->tls; - cfg.offset_start = draw->offset_start; + cfg.offset_start = draw->info.vertex.raw_offset; cfg.instance_size = - draw->instance_count > 1 ? draw->padded_vertex_count : 1; + draw->info.instance.count > 1 ? draw->padded_vertex_count : 1; cfg.uniform_buffers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO]; cfg.push_uniforms = draw->push_uniforms; cfg.textures = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE]; @@ -842,7 +734,7 @@ panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf, static VkResult panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB); @@ -898,7 +790,7 @@ translate_prim_topology(VkPrimitiveTopology in) static void panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_draw_info *draw, void *prim) + const struct panvk_draw_data *draw, void *prim) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; const struct panvk_shader *fs = get_fs(cmdbuf); @@ -924,12 +816,13 @@ panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf, cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT; cfg.job_task_split = 6; - if (draw->index_size) { - cfg.index_count = draw->index_count; + if (draw->info.index.size) { + cfg.index_count = draw->info.vertex.count; cfg.indices = draw->indices; - cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start; + cfg.base_vertex_offset = + draw->info.vertex.base - draw->info.vertex.raw_offset; - switch (draw->index_size) { + switch (draw->info.index.size) { case 4: cfg.index_type = MALI_INDEX_TYPE_UINT32; break; @@ -943,7 +836,7 @@ panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf, unreachable("Invalid index size"); } } else { - cfg.index_count = draw->vertex_count; + cfg.index_count = draw->info.vertex.count; cfg.index_type = MALI_INDEX_TYPE_NONE; } @@ -956,7 +849,7 @@ panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf, static void panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_draw_info *draw, + const struct panvk_draw_data *draw, void *primsz) { const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; @@ -977,7 +870,7 @@ panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf, static void panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf, - const struct panvk_draw_info *draw, void *dcd) + const struct panvk_draw_data *draw, void *dcd) { struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; const struct vk_rasterization_state *rs = @@ -1007,9 +900,9 @@ panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf, ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP) cfg.flat_shading_vertex = true; - cfg.offset_start = draw->offset_start; + cfg.offset_start = draw->info.vertex.raw_offset; cfg.instance_size = - draw->instance_count > 1 ? draw->padded_vertex_count : 1; + draw->info.instance.count > 1 ? draw->padded_vertex_count : 1; cfg.uniform_buffers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO]; cfg.push_uniforms = draw->push_uniforms; cfg.textures = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE]; @@ -1039,7 +932,7 @@ set_provoking_vertex_mode(struct panvk_cmd_buffer *cmdbuf) static VkResult panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; @@ -1085,7 +978,7 @@ panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, static VkResult panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, INDEXED_VERTEX_JOB); @@ -1124,7 +1017,7 @@ panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer *cmdbuf, static VkResult panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; @@ -1149,7 +1042,7 @@ panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf, static VkResult panvk_draw_prepare_fs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) + struct panvk_draw_data *draw) { const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; @@ -1211,7 +1104,7 @@ panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer *cmd) } static void -panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) +panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_data *draw) { struct panvk_batch *batch = cmdbuf->cur_batch; const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; @@ -1336,7 +1229,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) draw->fb = batch->fb.desc.gpu; panfrost_pack_work_groups_compute(&draw->invocation, 1, draw->vertex_range, - draw->instance_count, 1, 1, 1, true, + draw->info.instance.count, 1, 1, 1, true, false); result = panvk_draw_prepare_fs_rsd(cmdbuf, draw); @@ -1350,15 +1243,27 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) batch->tlsinfo.tls.size = MAX3(vs->info.tls_size, fs ? fs->info.tls_size : 0, batch->tlsinfo.tls.size); + if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) { + VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)( + cmdbuf, desc_state, vs, vs_desc_state); + if (result != VK_SUCCESS) + return; + } + + if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) { + VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)( + cmdbuf, desc_state, fs, fs_desc_state); + if (result != VK_SUCCESS) + return; + } + for (uint32_t i = 0; i < layer_count; i++) { - draw->layer_id = i; + draw->info.layer_id = i; result = panvk_draw_prepare_varyings(cmdbuf, draw); if (result != VK_SUCCESS) return; - result = panvk_cmd_prepare_draw_sysvals(cmdbuf, draw); - if (result != VK_SUCCESS) - return; + panvk_per_arch(cmd_prepare_draw_sysvals)(cmdbuf, &draw->info); cmdbuf->state.gfx.push_uniforms = panvk_per_arch( cmd_prepare_push_uniforms)(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS); @@ -1435,15 +1340,17 @@ panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, * firstInstnace. */ assert(firstInstance < INT32_MAX); - struct panvk_draw_info draw = { - .first_vertex = firstVertex, - .vertex_count = vertexCount, + struct panvk_draw_data draw = { + .info = { + .vertex.base = firstVertex, + .vertex.raw_offset = firstVertex, + .vertex.count = vertexCount, + .instance.base = firstInstance, + .instance.count = instanceCount, + }, .vertex_range = vertexCount, - .first_instance = firstInstance, - .instance_count = instanceCount, .padded_vertex_count = padded_vertex_count(cmdbuf, vertexCount, instanceCount), - .offset_start = firstVertex, }; panvk_cmd_draw(cmdbuf, &draw); @@ -1522,18 +1429,19 @@ panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer, &min_vertex, &max_vertex); unsigned vertex_range = max_vertex - min_vertex + 1; - struct panvk_draw_info draw = { - .index_size = cmdbuf->state.gfx.ib.index_size, - .first_index = firstIndex, - .index_count = indexCount, - .vertex_offset = vertexOffset, - .first_instance = firstInstance, - .instance_count = instanceCount, + struct panvk_draw_data draw = { + .info = { + .index.size = cmdbuf->state.gfx.ib.index_size, + .index.offset = firstIndex, + .vertex.base = vertexOffset, + .vertex.raw_offset = min_vertex + vertexOffset, + .vertex.count = indexCount, + .instance.base = firstInstance, + .instance.count = instanceCount, + }, .vertex_range = vertex_range, - .vertex_count = indexCount + abs(vertexOffset), .padded_vertex_count = padded_vertex_count(cmdbuf, vertex_range, instanceCount), - .offset_start = min_vertex + vertexOffset, .indices = panvk_buffer_gpu_ptr(cmdbuf->state.gfx.ib.buffer, cmdbuf->state.gfx.ib.offset) + (firstIndex * cmdbuf->state.gfx.ib.index_size), diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build index 98797b706e1..93cc3653bd1 100644 --- a/src/panfrost/vulkan/meson.build +++ b/src/panfrost/vulkan/meson.build @@ -96,6 +96,7 @@ common_per_arch_files = [ 'panvk_vX_buffer_view.c', 'panvk_vX_cmd_fb_preload.c', 'panvk_vX_cmd_desc_state.c', + 'panvk_vX_cmd_dispatch.c', 'panvk_vX_cmd_draw.c', 'panvk_vX_cmd_meta.c', 'panvk_vX_cmd_push_constant.c', diff --git a/src/panfrost/vulkan/panvk_cmd_dispatch.h b/src/panfrost/vulkan/panvk_cmd_dispatch.h index 450b89ad96f..b21f37a8ccd 100644 --- a/src/panfrost/vulkan/panvk_cmd_dispatch.h +++ b/src/panfrost/vulkan/panvk_cmd_dispatch.h @@ -43,4 +43,22 @@ struct panvk_cmd_compute_state { compute_state_clear_all_dirty(__cmdbuf); \ } while (0) +struct panvk_dispatch_info { + struct { + struct { + uint32_t x, y, z; + } wg_base; + struct { + uint32_t x, y, z; + } wg_count; + } direct; + + struct { + mali_ptr buffer_dev_addr; + } indirect; +}; + +void panvk_per_arch(cmd_prepare_dispatch_sysvals)( + struct panvk_cmd_buffer *cmdbuf, const struct panvk_dispatch_info *info); + #endif diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index a4b46e2dfa2..34bba81361b 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -299,4 +299,38 @@ panvk_per_arch(cmd_preload_render_area_border)(struct panvk_cmd_buffer *cmdbuf, void panvk_per_arch(cmd_resolve_attachments)(struct panvk_cmd_buffer *cmdbuf); +struct panvk_draw_info { + struct { + uint32_t size; + uint32_t offset; + } index; + + struct { +#if PAN_ARCH <= 7 + int32_t raw_offset; +#endif + int32_t base; + uint32_t count; + } vertex; + + struct { + int32_t base; + uint32_t count; + } instance; + + struct { + mali_ptr buffer_dev_addr; + uint32_t draw_count; + uint32_t stride; + } indirect; + +#if PAN_ARCH <= 7 + uint32_t layer_id; +#endif +}; + +void +panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf, + const struct panvk_draw_info *info); + #endif diff --git a/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c new file mode 100644 index 00000000000..6d0ed14e4a0 --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_cmd_dispatch.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2024 Collabora Ltd. + * Copyright © 2024 Arm Ltd. + * + * SPDX-License-Identifier: MIT + */ + +#include "panvk_cmd_buffer.h" +#include "panvk_cmd_dispatch.h" + +void +panvk_per_arch(cmd_prepare_dispatch_sysvals)( + struct panvk_cmd_buffer *cmdbuf, const struct panvk_dispatch_info *info) +{ + struct panvk_compute_sysvals *sysvals = &cmdbuf->state.compute.sysvals; + const struct panvk_shader *shader = cmdbuf->state.compute.shader; + + /* In indirect case, some sysvals are read from the indirect dispatch + * buffer. + */ + if (info->indirect.buffer_dev_addr == 0) { + sysvals->base.x = info->direct.wg_base.x; + sysvals->base.y = info->direct.wg_base.y; + sysvals->base.z = info->direct.wg_base.z; + sysvals->num_work_groups.x = info->direct.wg_count.x; + sysvals->num_work_groups.y = info->direct.wg_count.y; + sysvals->num_work_groups.z = info->direct.wg_count.z; + } + + sysvals->local_group_size.x = shader->local_size.x; + sysvals->local_group_size.y = shader->local_size.y; + sysvals->local_group_size.z = shader->local_size.z; + +#if PAN_ARCH <= 7 + struct panvk_descriptor_state *desc_state = + &cmdbuf->state.compute.desc_state; + struct panvk_shader_desc_state *cs_desc_state = + &cmdbuf->state.compute.cs.desc; + + if (compute_state_dirty(cmdbuf, CS) || + compute_state_dirty(cmdbuf, DESC_STATE)) { + sysvals->desc.sets[PANVK_DESC_TABLE_CS_DYN_SSBOS] = + cs_desc_state->dyn_ssbos; + } + + for (uint32_t i = 0; i < MAX_SETS; i++) { + if (shader->desc_info.used_set_mask & BITFIELD_BIT(i)) + sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev; + } +#endif + + /* We unconditionally update the sysvals, so push_uniforms is always dirty. */ + compute_state_set_dirty(cmdbuf, PUSH_UNIFORMS); +} diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index 4bec2183d51..885cf3b83c3 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -539,6 +539,151 @@ panvk_per_arch(cmd_preload_render_area_border)( panvk_per_arch(cmd_force_fb_preload)(cmdbuf, render_info); } +/* This value has been selected to get + * dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero passing. + */ +#define MIN_DEPTH_CLIP_RANGE 37.7E-06f + +void +panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf, + const struct panvk_draw_info *info) +{ + struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals; + struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb; + const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader; + uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0; + + if (sysvals->vs.noperspective_varyings != noperspective_varyings) { + sysvals->vs.noperspective_varyings = noperspective_varyings; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } + + if (sysvals->vs.first_vertex != info->vertex.base) { + sysvals->vs.first_vertex = info->vertex.base; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } + + if (sysvals->vs.base_instance != info->instance.base) { + sysvals->vs.base_instance = info->instance.base; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } + +#if PAN_ARCH <= 7 + if (sysvals->vs.raw_vertex_offset != info->vertex.raw_offset) { + sysvals->vs.raw_vertex_offset = info->vertex.raw_offset; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } + + if (sysvals->layer_id != info->layer_id) { + sysvals->layer_id = info->layer_id; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } +#endif + + if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) { + for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++) + sysvals->blend.constants[i] = + CLAMP(cb->blend_constants[i], 0.0f, 1.0f); + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } + + if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) || + dyn_gfx_state_dirty(cmdbuf, RS_CULL_MODE) || + dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE)) { + VkViewport *viewport = &cmdbuf->vk.dynamic_graphics_state.vp.viewports[0]; + + /* Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of + * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the + * end of the section, the spec defines: + * + * px = width + * py = height + * pz = maxDepth - minDepth + */ + sysvals->viewport.scale.x = 0.5f * viewport->width; + sysvals->viewport.scale.y = 0.5f * viewport->height; + sysvals->viewport.scale.z = (viewport->maxDepth - viewport->minDepth); + + /* Upload the viewport offset. Defined as (ox, oy, oz) at the start of + * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the + * end of the section, the spec defines: + * + * ox = x + width/2 + * oy = y + height/2 + * oz = minDepth + */ + sysvals->viewport.offset.x = (0.5f * viewport->width) + viewport->x; + sysvals->viewport.offset.y = (0.5f * viewport->height) + viewport->y; + sysvals->viewport.offset.z = viewport->minDepth; + +#if PAN_ARCH >= 9 + /* Doing the viewport transform in the vertex shader and then depth + * clipping with the viewport depth range gets a similar result to + * clipping in clip-space, but loses precision when the viewport depth + * range is very small. When minDepth == maxDepth, this completely + * flattens the clip-space depth and results in never clipping. + * + * To work around this, set a lower limit on depth range when clipping is + * enabled. This results in slightly incorrect fragment depth values, and + * doesn't help with the precision loss, but at least clipping isn't + * completely broken. + */ + const struct vk_rasterization_state *rs = + &cmdbuf->vk.dynamic_graphics_state.rs; + + if (vk_rasterization_state_depth_clip_enable(rs) && + fabsf(sysvals->viewport.scale.z) < MIN_DEPTH_CLIP_RANGE) { + float z_min = viewport->minDepth; + float z_max = viewport->maxDepth; + float z_sign = z_min <= z_max ? 1.0f : -1.0f; + + sysvals->viewport.scale.z = z_sign * MIN_DEPTH_CLIP_RANGE; + + /* Middle of the user range is + * z_range_center = z_min + (z_max - z_min) * 0.5f, + * and we want to set the offset to + * z_offset = z_range_center - viewport.scale.z * 0.5f + * which, when expanding, gives us + * z_offset = (z_max + z_min - viewport.scale.z) * 0.5f + */ + float z_offset = (z_max + z_min - sysvals->viewport.scale.z) * 0.5f; + /* Bump offset off-center if necessary, to not go out of range */ + sysvals->viewport.offset.z = CLAMP(z_offset, 0.0f, 1.0f); + } +#endif + + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } + +#if PAN_ARCH <= 7 + const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader; + struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state; + struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc; + struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc; + + if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) { + sysvals->desc.sets[PANVK_DESC_TABLE_VS_DYN_SSBOS] = + vs_desc_state->dyn_ssbos; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } + + if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) { + sysvals->desc.sets[PANVK_DESC_TABLE_FS_DYN_SSBOS] = + fs_desc_state->dyn_ssbos; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } + + for (uint32_t i = 0; i < MAX_SETS; i++) { + uint32_t used_set_mask = + vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0); + + if (used_set_mask & BITFIELD_BIT(i)) + sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev; + gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS); + } +#endif +} + VKAPI_ATTR void VKAPI_CALL panvk_per_arch(CmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t firstBinding,