From 3e473a9d4cd70f8efe596a952ec4906031705b64 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 4 Dec 2024 17:20:51 +0100 Subject: [PATCH] pan/bi: Get rid of bi_lower_load_push_const_with_dyn_offset() The vulkan driver is now lowering push constant dynamic indexing to global loads, so we can get rid of this pass. Signed-off-by: Boris Brezillon Reviewed-by: Chia-I Wu Reviewed-by: Mary Guillemard Reviewed-by: Lars-Ivar Hesselberg Simonsen Part-of: --- src/panfrost/compiler/bifrost_compile.c | 84 ------------------------- 1 file changed, 84 deletions(-) diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index b70979a224d..0daed066733 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -5158,86 +5158,6 @@ bifrost_nir_lower_load_output(nir_shader *nir) nir_metadata_control_flow, NULL); } -static bool -bi_lower_load_push_const_with_dyn_offset(nir_builder *b, - nir_intrinsic_instr *intr, - UNUSED void *data) -{ - if (intr->intrinsic != nir_intrinsic_load_push_constant) - return false; - - /* Offset is constant, nothing to do. */ - if (nir_src_is_const(intr->src[0])) - return false; - - /* nir_lower_mem_access_bit_sizes() should have lowered load_push_constant - * to 32-bit and a maximum of 4 components. - */ - assert(intr->def.num_components <= 4); - assert(intr->def.bit_size == 32); - - uint32_t base = nir_intrinsic_base(intr); - uint32_t range = nir_intrinsic_range(intr); - uint32_t nwords = intr->def.num_components; - - b->cursor = nir_before_instr(&intr->instr); - - /* Dynamic indexing is only allowed for vulkan push constants, which is - * currently limited to 256 bytes. That gives us a maximum of 64 32-bit - * words to read from. - */ - nir_def *lut[64] = {0}; - - assert(range / 4 <= ARRAY_SIZE(lut)); - - /* Load all words in the range. */ - for (uint32_t w = 0; w < range / 4; w++) { - lut[w] = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), - .base = base + (w * 4), .range = 4); - } - - nir_def *index = intr->src[0].ssa; - - /* Index is dynamic, we need to do iteratively CSEL the values based on - * the index. We start with the highest bit in the index, and for each - * iteration we divide the scope by two. - */ - for (uint32_t lut_sz = ARRAY_SIZE(lut); lut_sz > 0; lut_sz /= 2) { - uint32_t stride = lut_sz / 2; - nir_def *bit_test = NULL; - - /* Stop when the LUT is smaller than the number of words we're trying to - * extract. - */ - if (lut_sz <= nwords) - break; - - for (uint32_t i = 0; i < stride; i++) { - /* We only need a CSEL if we have two values, otherwise we pick the - * non-NULL value. - */ - if (lut[i] && lut[i + stride]) { - /* Create the test src on-demand. The stride is in 32-bit words, - * multiply by four to convert it into a byte stride we can use - * to test if the corresponding bit is set in the index src. - */ - if (!bit_test) - bit_test = nir_i2b(b, nir_iand_imm(b, index, stride * 4)); - - lut[i] = nir_bcsel(b, bit_test, lut[i + stride], lut[i]); - } else if (lut[i + stride]) { - lut[i] = lut[i + stride]; - } - } - } - - nir_def *res = nir_vec(b, &lut[0], nwords); - - nir_def_rewrite_uses(&intr->def, res); - nir_instr_remove(&intr->instr); - return true; -} - void bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) { @@ -5318,10 +5238,6 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) }; NIR_PASS(_, nir, nir_lower_mem_access_bit_sizes, &mem_size_options); - NIR_PASS(_, nir, nir_shader_intrinsics_pass, - bi_lower_load_push_const_with_dyn_offset, nir_metadata_control_flow, - NULL); - nir_lower_ssbo_options ssbo_opts = { .native_loads = pan_arch(gpu_id) >= 9, .native_offset = pan_arch(gpu_id) >= 9,