From 02cecffe2bbacb923f3b8de5a0e1ffa08fe835a6 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 22 Dec 2022 20:44:07 +0200 Subject: [PATCH] anv: add a pass to partially lower resource_intel Signed-off-by: Lionel Landwerlin Reviewed-by: Kenneth Graunke Part-of: --- src/intel/vulkan/anv_nir.h | 6 + .../vulkan/anv_nir_compute_push_layout.c | 2 +- .../vulkan/anv_nir_lower_resource_intel.c | 203 ++++++++++++++++++ .../vulkan/anv_nir_push_descriptor_analysis.c | 9 +- src/intel/vulkan/anv_pipeline.c | 9 + src/intel/vulkan/meson.build | 1 + 6 files changed, 225 insertions(+), 5 deletions(-) create mode 100644 src/intel/vulkan/anv_nir_lower_resource_intel.c diff --git a/src/intel/vulkan/anv_nir.h b/src/intel/vulkan/anv_nir.h index fe4cf90cd99..947d2ca4c95 100644 --- a/src/intel/vulkan/anv_nir.h +++ b/src/intel/vulkan/anv_nir.h @@ -84,6 +84,12 @@ void anv_nir_compute_push_layout(nir_shader *nir, void anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data, struct anv_pipeline_bind_map *map); +bool anv_nir_update_resource_intel_block(nir_shader *shader); + +bool anv_nir_lower_resource_intel(nir_shader *shader, + const struct anv_physical_device *device, + enum anv_descriptor_set_layout_type desc_type); + bool anv_nir_add_base_work_group_id(nir_shader *shader); uint32_t anv_nir_compute_used_push_descriptors(nir_shader *shader, diff --git a/src/intel/vulkan/anv_nir_compute_push_layout.c b/src/intel/vulkan/anv_nir_compute_push_layout.c index a7c9e720f86..b83eafec75e 100644 --- a/src/intel/vulkan/anv_nir_compute_push_layout.c +++ b/src/intel/vulkan/anv_nir_compute_push_layout.c @@ -55,7 +55,7 @@ anv_nir_compute_push_layout(nir_shader *nir, nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); switch (intrin->intrinsic) { case nir_intrinsic_load_ubo: - if (nir_src_is_const(intrin->src[0]) && + if (brw_nir_ubo_surface_index_is_pushable(intrin->src[0]) && nir_src_is_const(intrin->src[1])) has_const_ubo = true; break; diff --git a/src/intel/vulkan/anv_nir_lower_resource_intel.c b/src/intel/vulkan/anv_nir_lower_resource_intel.c new file mode 100644 index 00000000000..7ff02035474 --- /dev/null +++ b/src/intel/vulkan/anv_nir_lower_resource_intel.c @@ -0,0 +1,203 @@ +/* + * Copyright © 2022 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "nir_builder.h" + +/* This pass updates the block index in the resource_intel intrinsics if the + * array index is constant. + * + * This pass must be run before anv_nir_compute_push_layout(). + */ +static bool +update_resource_intel_block(nir_builder *b, nir_instr *instr, UNUSED void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_resource_intel) + return false; + + /* If the array index in the descriptor binding is not const, we won't be + * able to turn this load_ubo into a push constant. + * + * Also if not pushable, set the block to 0xffffffff. + * + * Otherwise we need to update the block index by adding the array index so + * that when anv_nir_compute_push_layout() uses the block value it uses the + * right surface in the array of the binding. + */ + if (!nir_src_is_const(intrin->src[2]) || + !(nir_intrinsic_resource_access_intel(intrin) & + nir_resource_intel_pushable)) { + nir_intrinsic_set_resource_block_intel(intrin, 0xffffffff); + nir_intrinsic_set_resource_access_intel( + intrin, + nir_intrinsic_resource_access_intel(intrin) & + ~nir_resource_intel_pushable); + } else { + nir_intrinsic_set_resource_block_intel( + intrin, + nir_intrinsic_resource_block_intel(intrin) + + nir_src_as_uint(intrin->src[2])); + } + + return true; +} + +bool +anv_nir_update_resource_intel_block(nir_shader *shader) +{ + return nir_shader_instructions_pass(shader, update_resource_intel_block, + nir_metadata_all, + NULL); +} + +static bool +intrinsic_dont_need_rewrite(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_store_ssbo: + return true; + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic: + case nir_intrinsic_image_atomic_swap: + case nir_intrinsic_image_size: + case nir_intrinsic_image_load_raw_intel: + case nir_intrinsic_image_store_raw_intel: + case nir_intrinsic_image_samples: + case nir_intrinsic_bindless_image_load: + case nir_intrinsic_bindless_image_store: + case nir_intrinsic_bindless_image_atomic: + case nir_intrinsic_bindless_image_atomic_swap: + case nir_intrinsic_bindless_image_size: + return true; + + default: + return false; + } +} + +struct lower_resource_state { + enum anv_descriptor_set_layout_type desc_type; + const struct anv_physical_device *device; +}; + +/* This pass lower resource_intel surface_index source, combining the + * descriptor set offset with the surface offset in the descriptor set. + * + * This pass must be run after anv_nir_compute_push_layout() because we want + * the push constant selection to tell if the surface offset is constant. Once + * combined the constant detection does not work anymore. + */ +static bool +lower_resource_intel(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_resource_intel) + return false; + + const bool is_bindless = + (nir_intrinsic_resource_access_intel(intrin) & + nir_resource_intel_bindless) != 0; + const bool is_sampler = + (nir_intrinsic_resource_access_intel(intrin) & + nir_resource_intel_sampler) != 0; + const struct lower_resource_state *state = data; + + if (!is_bindless) + return true; + + b->cursor = nir_before_instr(instr); + + nir_ssa_def *set_offset = intrin->src[0].ssa; + nir_ssa_def *binding_offset = intrin->src[1].ssa; + + /* When using indirect descriptor, the surface handles are loaded from the + * descriptor buffer and do not need any offset. + */ + if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT) { + if (!state->device->uses_ex_bso) { + /* We're trying to reduce the number of instructions in the shaders + * to compute surface handles. The assumption is that we're using + * more surface handles than sampler handles (UBO, SSBO, images, + * etc...) so it's worth optimizing that case. + * + * Surface handles in the extended descriptor message have to be + * shifted left by 6 prior to ex_bso (bits 31:12 in extended + * descriptor, match bits 25:6 of the surface handle). We have to + * combine 2 parts in the shader to build the final surface handle, + * base offset of the descriptor set (in the push constant, located + * in resource_intel::src[0]) and the relative descriptor offset + * (resource_intel::src[1]). + * + * For convenience, up to here, resource_intel::src[1] is in bytes. + * We now have to shift it left by 6 to match the shifted left by 6 + * done for the push constant value provided in + * resource_intel::src[0]. That way the shader can just do a single + * ADD and get the surface handle. + * + * Samplers have a 4Gb heap and in the message they're in bits 31:6 + * of the component 3 of the sampler message header. But since we + * push only a single offset for the base offset of the descriptor + * set, resource_intel::src[0] has to be shifted right by 6 (bringing + * it back in bytes). + */ + if (is_sampler) + set_offset = nir_ushr_imm(b, set_offset, 6); + else + binding_offset = nir_ishl_imm(b, binding_offset, 6); + } + + nir_instr_rewrite_src_ssa(instr, &intrin->src[1], + nir_iadd(b, set_offset, binding_offset)); + } + + /* Now unused values : set offset, array index */ + nir_instr_rewrite_src_ssa(instr, &intrin->src[0], nir_imm_int(b, 0xdeaddeed)); + nir_instr_rewrite_src_ssa(instr, &intrin->src[2], nir_imm_int(b, 0xdeaddeed)); + + return true; +} + +bool +anv_nir_lower_resource_intel(nir_shader *shader, + const struct anv_physical_device *device, + enum anv_descriptor_set_layout_type desc_type) +{ + struct lower_resource_state state = { + .desc_type = desc_type, + .device = device, + }; + return nir_shader_instructions_pass(shader, lower_resource_intel, + nir_metadata_block_index | + nir_metadata_dominance, + &state); +} diff --git a/src/intel/vulkan/anv_nir_push_descriptor_analysis.c b/src/intel/vulkan/anv_nir_push_descriptor_analysis.c index a5d26358201..7367c55cc82 100644 --- a/src/intel/vulkan/anv_nir_push_descriptor_analysis.c +++ b/src/intel/vulkan/anv_nir_push_descriptor_analysis.c @@ -23,6 +23,8 @@ #include "anv_nir.h" +#include "compiler/brw_nir.h" + const struct anv_descriptor_set_layout * anv_pipeline_layout_get_push_set(const struct anv_pipeline_sets_layout *layout, uint8_t *set_idx) @@ -191,12 +193,11 @@ anv_nir_push_desc_ubo_fully_promoted(nir_shader *nir, if (intrin->intrinsic != nir_intrinsic_load_ubo) continue; - const nir_const_value *const_bt_idx = - nir_src_as_const_value(intrin->src[0]); - if (const_bt_idx == NULL) + if (!brw_nir_ubo_surface_index_is_pushable(intrin->src[0])) continue; - const unsigned bt_idx = const_bt_idx[0].u32; + const unsigned bt_idx = + brw_nir_ubo_surface_index_get_bti(intrin->src[0]); /* Skip if this isn't a load from push descriptor buffer. */ const struct anv_pipeline_binding *binding = diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 14887aae58b..de4562e10c6 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -1025,8 +1025,14 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, .types = lower_non_uniform_access_types, .callback = NULL, }); + + NIR_PASS(_, nir, brw_nir_lower_non_uniform_resource_intel); + NIR_PASS(_, nir, brw_nir_cleanup_resource_intel); + NIR_PASS(_, nir, nir_opt_dce); } + NIR_PASS_V(nir, anv_nir_update_resource_intel_block); + stage->dynamic_push_values = anv_nir_compute_dynamic_push_bits(nir); NIR_PASS_V(nir, anv_nir_compute_push_layout, @@ -1034,6 +1040,9 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline, anv_graphics_pipeline_stage_fragment_dynamic(stage), prog_data, &stage->bind_map, mem_ctx); + NIR_PASS_V(nir, anv_nir_lower_resource_intel, pdevice, + pipeline->layout.type); + if (gl_shader_stage_uses_workgroup(nir->info.stage)) { if (!nir->info.shared_memory_explicit_layout) { NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index 7b975dccb0a..0c34e486b3f 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -170,6 +170,7 @@ libanv_files = files( 'anv_nir_lower_multiview.c', 'anv_nir_lower_load_patch_vertices_in.c', 'anv_nir_lower_ubo_loads.c', + 'anv_nir_lower_resource_intel.c', 'anv_nir_push_descriptor_analysis.c', 'anv_perf.c', 'anv_pipeline.c',