pan/bi: Get rid of bi_lower_load_push_const_with_dyn_offset()

The vulkan driver is now lowering push constant dynamic indexing to
global loads, so we can get rid of this pass.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32415>
This commit is contained in:
Boris Brezillon
2024-12-04 17:20:51 +01:00
committed by Marge Bot
parent 0361c1ca08
commit 3e473a9d4c

View File

@@ -5158,86 +5158,6 @@ bifrost_nir_lower_load_output(nir_shader *nir)
nir_metadata_control_flow, NULL);
}
static bool
bi_lower_load_push_const_with_dyn_offset(nir_builder *b,
nir_intrinsic_instr *intr,
UNUSED void *data)
{
if (intr->intrinsic != nir_intrinsic_load_push_constant)
return false;
/* Offset is constant, nothing to do. */
if (nir_src_is_const(intr->src[0]))
return false;
/* nir_lower_mem_access_bit_sizes() should have lowered load_push_constant
* to 32-bit and a maximum of 4 components.
*/
assert(intr->def.num_components <= 4);
assert(intr->def.bit_size == 32);
uint32_t base = nir_intrinsic_base(intr);
uint32_t range = nir_intrinsic_range(intr);
uint32_t nwords = intr->def.num_components;
b->cursor = nir_before_instr(&intr->instr);
/* Dynamic indexing is only allowed for vulkan push constants, which is
* currently limited to 256 bytes. That gives us a maximum of 64 32-bit
* words to read from.
*/
nir_def *lut[64] = {0};
assert(range / 4 <= ARRAY_SIZE(lut));
/* Load all words in the range. */
for (uint32_t w = 0; w < range / 4; w++) {
lut[w] = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0),
.base = base + (w * 4), .range = 4);
}
nir_def *index = intr->src[0].ssa;
/* Index is dynamic, we need to do iteratively CSEL the values based on
* the index. We start with the highest bit in the index, and for each
* iteration we divide the scope by two.
*/
for (uint32_t lut_sz = ARRAY_SIZE(lut); lut_sz > 0; lut_sz /= 2) {
uint32_t stride = lut_sz / 2;
nir_def *bit_test = NULL;
/* Stop when the LUT is smaller than the number of words we're trying to
* extract.
*/
if (lut_sz <= nwords)
break;
for (uint32_t i = 0; i < stride; i++) {
/* We only need a CSEL if we have two values, otherwise we pick the
* non-NULL value.
*/
if (lut[i] && lut[i + stride]) {
/* Create the test src on-demand. The stride is in 32-bit words,
* multiply by four to convert it into a byte stride we can use
* to test if the corresponding bit is set in the index src.
*/
if (!bit_test)
bit_test = nir_i2b(b, nir_iand_imm(b, index, stride * 4));
lut[i] = nir_bcsel(b, bit_test, lut[i + stride], lut[i]);
} else if (lut[i + stride]) {
lut[i] = lut[i + stride];
}
}
}
nir_def *res = nir_vec(b, &lut[0], nwords);
nir_def_rewrite_uses(&intr->def, res);
nir_instr_remove(&intr->instr);
return true;
}
void
bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
{
@@ -5318,10 +5238,6 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
};
NIR_PASS(_, nir, nir_lower_mem_access_bit_sizes, &mem_size_options);
NIR_PASS(_, nir, nir_shader_intrinsics_pass,
bi_lower_load_push_const_with_dyn_offset, nir_metadata_control_flow,
NULL);
nir_lower_ssbo_options ssbo_opts = {
.native_loads = pan_arch(gpu_id) >= 9,
.native_offset = pan_arch(gpu_id) >= 9,