From 2988f43420d1826756da786e7ffe6c7d697a9b9d Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 27 Jun 2024 06:49:03 -0400 Subject: [PATCH] tu: Support VK_EXT_fragment_density_map on a750 Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.cc | 130 ++++++++++++++++++++------ src/freedreno/vulkan/tu_device.cc | 2 +- src/freedreno/vulkan/tu_shader.cc | 7 +- 3 files changed, 109 insertions(+), 30 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index b8d406acca9..17585538e88 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -1090,9 +1090,14 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, } /* Make the CP wait until the CP_MEM_WRITE's to the command buffers - * land. + * land. When loading FS params via UBOs, we also need to invalidate + * UCHE because the FS param patchpoint is read through UCHE. */ tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); + if (cmd->device->compiler->load_shader_consts_via_preamble) { + tu_emit_event_write(cmd, cs, FD_CACHE_INVALIDATE); + tu_cs_emit_wfi(cs); + } tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0); } } @@ -5117,6 +5122,38 @@ fdm_apply_fs_params(struct tu_cmd_buffer *cmd, } } +static void +tu_emit_fdm_params(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, struct tu_shader *fs, + unsigned num_units) +{ + STATIC_ASSERT(IR3_DP_FS_FRAG_INVOCATION_COUNT == IR3_DP_FS_DYNAMIC); + tu_cs_emit(cs, fs->fs.per_samp ? + cmd->vk.dynamic_graphics_state.ms.rasterization_samples : 1); + tu_cs_emit(cs, 0); + tu_cs_emit(cs, 0); + tu_cs_emit(cs, 0); + + STATIC_ASSERT(IR3_DP_FS_FRAG_SIZE == IR3_DP_FS_DYNAMIC + 4); + STATIC_ASSERT(IR3_DP_FS_FRAG_OFFSET == IR3_DP_FS_DYNAMIC + 6); + if (num_units > 1) { + if (fs->fs.has_fdm) { + struct apply_fs_params_state state = { + .num_consts = num_units - 1, + }; + tu_create_fdm_bin_patchpoint(cmd, cs, 4 * (num_units - 1), + fdm_apply_fs_params, state); + } else { + for (unsigned i = 1; i < num_units; i++) { + tu_cs_emit(cs, 1); + tu_cs_emit(cs, 1); + tu_cs_emit(cs, fui(0.0f)); + tu_cs_emit(cs, fui(0.0f)); + } + } + } +} + static void tu6_emit_fs_params(struct tu_cmd_buffer *cmd) { @@ -5151,31 +5188,7 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd) tu_cs_emit(&cs, 0); tu_cs_emit(&cs, 0); - STATIC_ASSERT(IR3_DP_FS_FRAG_INVOCATION_COUNT == IR3_DP_FS_DYNAMIC); - tu_cs_emit(&cs, fs->fs.per_samp ? - cmd->vk.dynamic_graphics_state.ms.rasterization_samples : 1); - tu_cs_emit(&cs, 0); - tu_cs_emit(&cs, 0); - tu_cs_emit(&cs, 0); - - STATIC_ASSERT(IR3_DP_FS_FRAG_SIZE == IR3_DP_FS_DYNAMIC + 4); - STATIC_ASSERT(IR3_DP_FS_FRAG_OFFSET == IR3_DP_FS_DYNAMIC + 6); - if (num_units > 1) { - if (fs->fs.has_fdm) { - struct apply_fs_params_state state = { - .num_consts = num_units - 1, - }; - tu_create_fdm_bin_patchpoint(cmd, &cs, 4 * (num_units - 1), - fdm_apply_fs_params, state); - } else { - for (unsigned i = 1; i < num_units; i++) { - tu_cs_emit(&cs, 1); - tu_cs_emit(&cs, 1); - tu_cs_emit(&cs, fui(0.0f)); - tu_cs_emit(&cs, fui(0.0f)); - } - } - } + tu_emit_fdm_params(cmd, &cs, fs, num_units); cmd->state.fs_params = tu_cs_end_draw_state(&cmd->sub_cs, &cs); @@ -5183,6 +5196,69 @@ tu6_emit_fs_params(struct tu_cmd_buffer *cmd) tu_cs_set_writeable(&cmd->sub_cs, false); } +static void +tu7_emit_fs_params(struct tu_cmd_buffer *cmd) +{ + struct tu_shader *fs = cmd->state.shaders[MESA_SHADER_FRAGMENT]; + + int ubo_offset = fs->const_state.fdm_ubo.idx; + if (ubo_offset < 0) { + cmd->state.fs_params = (struct tu_draw_state) {}; + return; + } + + unsigned num_units = DIV_ROUND_UP(fs->const_state.fdm_ubo.size, 4); + + if (fs->fs.has_fdm) + tu_cs_set_writeable(&cmd->sub_cs, true); + + struct tu_cs cs; + VkResult result = + tu_cs_begin_sub_stream_aligned(&cmd->sub_cs, num_units, 4, &cs); + if (result != VK_SUCCESS) { + tu_cs_set_writeable(&cmd->sub_cs, false); + vk_command_buffer_set_error(&cmd->vk, result); + return; + } + + tu_emit_fdm_params(cmd, &cs, fs, num_units); + + struct tu_draw_state fdm_ubo = tu_cs_end_draw_state(&cmd->sub_cs, &cs); + + if (fs->fs.has_fdm) + tu_cs_set_writeable(&cmd->sub_cs, false); + + result = tu_cs_begin_sub_stream(&cmd->sub_cs, 6, &cs); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd->vk, result); + return; + } + + tu_cs_emit_pkt7(&cs, CP_LOAD_STATE6_FRAG, 5); + tu_cs_emit(&cs, + CP_LOAD_STATE6_0_DST_OFF(ubo_offset) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO)| + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | + CP_LOAD_STATE6_0_NUM_UNIT(1)); + tu_cs_emit(&cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); + tu_cs_emit(&cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); + tu_cs_emit_qw(&cs, + fdm_ubo.iova | + (uint64_t)A6XX_UBO_1_SIZE(num_units) << 32); + + cmd->state.fs_params = tu_cs_end_draw_state(&cmd->sub_cs, &cs); +} + +static void +tu_emit_fs_params(struct tu_cmd_buffer *cmd) +{ + if (cmd->device->compiler->load_shader_consts_via_preamble) + tu7_emit_fs_params(cmd); + else + tu6_emit_fs_params(cmd); +} + template static VkResult tu6_draw_common(struct tu_cmd_buffer *cmd, @@ -5352,7 +5428,7 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, if (BITSET_TEST(cmd->vk.dynamic_graphics_state.dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) || (cmd->state.dirty & (TU_CMD_DIRTY_PROGRAM | TU_CMD_DIRTY_FDM))) { - tu6_emit_fs_params(cmd); + tu_emit_fs_params(cmd); dirty_fs_params = true; } diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 94b9705914f..f3736f0b856 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -242,7 +242,7 @@ get_device_extensions(const struct tu_physical_device *device, .EXT_extended_dynamic_state3 = true, .EXT_external_memory_dma_buf = true, .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic, - .EXT_fragment_density_map = !device->info->a7xx.load_shader_consts_via_preamble, + .EXT_fragment_density_map = true, .EXT_global_priority = true, .EXT_global_priority_query = true, .EXT_graphics_pipeline_library = true, diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index e22cffde6b4..1f7f07751c8 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -477,10 +477,12 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, instr->intrinsic == nir_intrinsic_load_frag_size_ir3 ? IR3_DP_FS_FRAG_SIZE : IR3_DP_FS_FRAG_OFFSET; + unsigned offset = param - IR3_DP_FS_DYNAMIC; + nir_def *view = instr->src[0].ssa; nir_def *result = ir3_load_driver_ubo_indirect(b, 2, &shader->const_state.fdm_ubo, - param, view, nir_intrinsic_range(instr)); + offset, view, nir_intrinsic_range(instr)); nir_def_replace(&instr->def, result); return true; @@ -491,7 +493,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, nir_def *result = ir3_load_driver_ubo(b, 1, &shader->const_state.fdm_ubo, - IR3_DP_FS_FRAG_INVOCATION_COUNT); + IR3_DP_FS_FRAG_INVOCATION_COUNT - + IR3_DP_FS_DYNAMIC); nir_def_replace(&instr->def, result); return true;