diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index e8100d19c04..3409b479d7e 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1561,6 +1561,8 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, UNUSED bool progress; /* Written by OPT */ + OPT(brw_nir_lower_sparse_intrinsics); + OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler); OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL); diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 52b55380b7c..2a109d8090e 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -191,6 +191,8 @@ bool brw_nir_lower_conversions(nir_shader *nir); bool brw_nir_lower_shading_rate_output(nir_shader *nir); +bool brw_nir_lower_sparse_intrinsics(nir_shader *nir); + struct brw_nir_lower_storage_image_opts { const struct intel_device_info *devinfo; diff --git a/src/intel/compiler/brw_nir_lower_sparse.c b/src/intel/compiler/brw_nir_lower_sparse.c new file mode 100644 index 00000000000..8976762e383 --- /dev/null +++ b/src/intel/compiler/brw_nir_lower_sparse.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2023 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_nir.h" +#include "compiler/nir/nir_builder.h" + +/* + * This pass lowers a few of the sparse instructions to something HW can + * handle. + * + * The image_*_sparse_load intrinsics are lowered into 2 instructions, a + * regular image_*_load intrinsic and a sparse texture txf operation and + * reconstructs the sparse vector of the original intrinsic using the 2 new + * values. We need to do this because our backend implements image load/store + * using the dataport and the dataport unit doesn't provide residency + * information. We need to use the sampler for residency. + * + * The is_sparse_texels_resident intrinsic is lowered to a bit checking + * operation as the data reported by the sampler is a single bit per lane in + * the first component. + * + * The tex_* instructions with a compare value need to be lower into 2 + * instructions due to a HW limitation : + * + * SKL PRMs, Volume 7: 3D-Media-GPGPU, Messages, SIMD Payloads : + * + * "The Pixel Null Mask field, when enabled via the Pixel Null Mask Enable + * will be incorect for sample_c when applied to a surface with 64-bit per + * texel format such as R16G16BA16_UNORM. Pixel Null mask Enable may + * incorrectly report pixels as referencing a Null surface." + */ + +static void +lower_is_sparse_texels_resident(nir_builder *b, nir_intrinsic_instr *intrin) +{ + b->cursor = nir_instr_remove(&intrin->instr); + + nir_ssa_def_rewrite_uses( + &intrin->dest.ssa, + nir_i2b(b, nir_iand(b, intrin->src[0].ssa, + nir_ishl(b, nir_imm_int(b, 1), + nir_load_subgroup_invocation(b))))); +} + +static void +lower_sparse_residency_code_and(nir_builder *b, nir_intrinsic_instr *intrin) +{ + b->cursor = nir_instr_remove(&intrin->instr); + + nir_ssa_def_rewrite_uses( + &intrin->dest.ssa, + nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa)); +} + +static void +lower_sparse_image_load(nir_builder *b, nir_intrinsic_instr *intrin) +{ + b->cursor = nir_instr_remove(&intrin->instr); + + nir_ssa_def *img_load; + nir_intrinsic_instr *new_intrin; + if (intrin->intrinsic == nir_intrinsic_image_sparse_load) { + img_load = nir_image_load(b, + intrin->num_components - 1, + nir_dest_bit_size(intrin->dest), + intrin->src[0].ssa, + intrin->src[1].ssa, + intrin->src[2].ssa, + intrin->src[3].ssa); + new_intrin = nir_instr_as_intrinsic(img_load->parent_instr); + nir_intrinsic_set_range_base(new_intrin, nir_intrinsic_range_base(intrin)); + } else { + img_load = nir_bindless_image_load(b, + intrin->num_components - 1, + nir_dest_bit_size(intrin->dest), + intrin->src[0].ssa, + intrin->src[1].ssa, + intrin->src[2].ssa, + intrin->src[3].ssa); + new_intrin = nir_instr_as_intrinsic(img_load->parent_instr); + } + + nir_intrinsic_set_image_array(new_intrin, nir_intrinsic_image_array(intrin)); + nir_intrinsic_set_image_dim(new_intrin, nir_intrinsic_image_dim(intrin)); + nir_intrinsic_set_format(new_intrin, nir_intrinsic_format(intrin)); + nir_intrinsic_set_access(new_intrin, nir_intrinsic_access(intrin)); + nir_intrinsic_set_dest_type(new_intrin, nir_intrinsic_dest_type(intrin)); + + nir_ssa_def *dests[NIR_MAX_VEC_COMPONENTS]; + for (unsigned i = 0; i < intrin->num_components - 1; i++) { + dests[i] = nir_channel(b, img_load, i); + } + + /* Use texture instruction to compute residency */ + nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3); + + tex->op = nir_texop_txf; + /* We don't care about the dest type since we're not using any of that + * data. + */ + tex->dest_type = nir_type_float32; + tex->is_array = nir_intrinsic_image_array(intrin); + tex->is_shadow = false; + tex->sampler_index = 0; + tex->is_sparse = true; + + tex->src[0].src_type = intrin->intrinsic == nir_intrinsic_image_sparse_load ? + nir_tex_src_texture_offset : + nir_tex_src_texture_handle; + tex->src[0].src = nir_src_for_ssa(intrin->src[0].ssa); + + tex->coord_components = nir_image_intrinsic_coord_components(intrin); + nir_ssa_def *coord; + if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE && + nir_intrinsic_image_array(intrin)) { + tex->coord_components++; + + nir_ssa_def *img_layer = nir_channel(b, intrin->src[1].ssa, 2); + nir_ssa_def *tex_slice = nir_idiv(b, img_layer, nir_imm_int(b, 6)); + nir_ssa_def *tex_face = + nir_iadd(b, img_layer, nir_ineg(b, nir_imul_imm(b, img_layer, 6))); + nir_ssa_def *comps[4] = { + nir_channel(b, intrin->src[1].ssa, 0), + nir_channel(b, intrin->src[1].ssa, 1), + tex_face, + tex_slice + }; + coord = nir_vec(b, comps, 4); + } else { + coord = nir_channels(b, intrin->src[1].ssa, + nir_component_mask(tex->coord_components)); + } + tex->src[1].src_type = nir_tex_src_coord; + tex->src[1].src = nir_src_for_ssa(coord); + + tex->src[2].src_type = nir_tex_src_lod; + tex->src[2].src = nir_src_for_ssa(nir_imm_int(b, 0)); + + nir_ssa_dest_init(&tex->instr, &tex->dest, 5, + nir_dest_bit_size(intrin->dest)); + + nir_builder_instr_insert(b, &tex->instr); + + dests[intrin->num_components - 1] = nir_channel(b, &tex->dest.ssa, 4); + + nir_ssa_def_rewrite_uses( + &intrin->dest.ssa, + nir_vec(b, dests, intrin->num_components)); +} + +static void +lower_tex_compare(nir_builder *b, nir_tex_instr *tex, int compare_idx) +{ + b->cursor = nir_after_instr(&tex->instr); + + /* Clone the original instruction */ + nir_tex_instr *sparse_tex = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr)); + nir_ssa_dest_init(&sparse_tex->instr, &sparse_tex->dest, + tex->dest.ssa.num_components, tex->dest.ssa.bit_size); + nir_builder_instr_insert(b, &sparse_tex->instr); + + /* Drop the compare source on the cloned instruction */ + nir_tex_instr_remove_src(sparse_tex, compare_idx); + + /* Drop the residency query on the original tex instruction */ + tex->is_sparse = false; + tex->dest.ssa.num_components = tex->dest.ssa.num_components - 1; + + nir_ssa_def *new_comps[NIR_MAX_VEC_COMPONENTS]; + for (unsigned i = 0; i < tex->dest.ssa.num_components; i++) + new_comps[i] = nir_channel(b, &tex->dest.ssa, i); + new_comps[tex->dest.ssa.num_components] = + nir_channel(b, &sparse_tex->dest.ssa, tex->dest.ssa.num_components); + + nir_ssa_def *new_vec = nir_vec(b, new_comps, sparse_tex->dest.ssa.num_components); + + nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, new_vec, new_vec->parent_instr); +} + +static bool +lower_sparse_intrinsics(nir_builder *b, nir_instr *instr, void *cb_data) +{ + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_image_sparse_load: + case nir_intrinsic_bindless_image_sparse_load: + lower_sparse_image_load(b, intrin); + return true; + + case nir_intrinsic_is_sparse_texels_resident: + lower_is_sparse_texels_resident(b, intrin); + return true; + + case nir_intrinsic_sparse_residency_code_and: + lower_sparse_residency_code_and(b, intrin); + return true; + + default: + return false; + } + } + + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + int comp_idx = nir_tex_instr_src_index(tex, nir_tex_src_comparator); + if (comp_idx != -1 && tex->is_sparse) { + lower_tex_compare(b, tex, comp_idx); + return true; + } + return false; + } + + default: + return false; + } +} + +bool +brw_nir_lower_sparse_intrinsics(nir_shader *nir) +{ + return nir_shader_instructions_pass(nir, lower_sparse_intrinsics, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); +} diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index adcbeabb806..6a07a70ff8f 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -96,6 +96,7 @@ libintel_compiler_files = files( 'brw_nir_lower_rt_intrinsics.c', 'brw_nir_lower_shader_calls.c', 'brw_nir_lower_shading_rate_output.c', + 'brw_nir_lower_sparse.c', 'brw_nir_lower_storage_image.c', 'brw_nir_opt_peephole_ffma.c', 'brw_nir_opt_peephole_imul32x16.c',