pan/bi: Get rid of bi_lower_load_push_const_with_dyn_offset()
The vulkan driver is now lowering push constant dynamic indexing to global loads, so we can get rid of this pass. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Chia-I Wu <olvaffe@gmail.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32415>
This commit is contained in:

committed by
Marge Bot

parent
0361c1ca08
commit
3e473a9d4c
@@ -5158,86 +5158,6 @@ bifrost_nir_lower_load_output(nir_shader *nir)
|
||||
nir_metadata_control_flow, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_lower_load_push_const_with_dyn_offset(nir_builder *b,
|
||||
nir_intrinsic_instr *intr,
|
||||
UNUSED void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_load_push_constant)
|
||||
return false;
|
||||
|
||||
/* Offset is constant, nothing to do. */
|
||||
if (nir_src_is_const(intr->src[0]))
|
||||
return false;
|
||||
|
||||
/* nir_lower_mem_access_bit_sizes() should have lowered load_push_constant
|
||||
* to 32-bit and a maximum of 4 components.
|
||||
*/
|
||||
assert(intr->def.num_components <= 4);
|
||||
assert(intr->def.bit_size == 32);
|
||||
|
||||
uint32_t base = nir_intrinsic_base(intr);
|
||||
uint32_t range = nir_intrinsic_range(intr);
|
||||
uint32_t nwords = intr->def.num_components;
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
/* Dynamic indexing is only allowed for vulkan push constants, which is
|
||||
* currently limited to 256 bytes. That gives us a maximum of 64 32-bit
|
||||
* words to read from.
|
||||
*/
|
||||
nir_def *lut[64] = {0};
|
||||
|
||||
assert(range / 4 <= ARRAY_SIZE(lut));
|
||||
|
||||
/* Load all words in the range. */
|
||||
for (uint32_t w = 0; w < range / 4; w++) {
|
||||
lut[w] = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0),
|
||||
.base = base + (w * 4), .range = 4);
|
||||
}
|
||||
|
||||
nir_def *index = intr->src[0].ssa;
|
||||
|
||||
/* Index is dynamic, we need to do iteratively CSEL the values based on
|
||||
* the index. We start with the highest bit in the index, and for each
|
||||
* iteration we divide the scope by two.
|
||||
*/
|
||||
for (uint32_t lut_sz = ARRAY_SIZE(lut); lut_sz > 0; lut_sz /= 2) {
|
||||
uint32_t stride = lut_sz / 2;
|
||||
nir_def *bit_test = NULL;
|
||||
|
||||
/* Stop when the LUT is smaller than the number of words we're trying to
|
||||
* extract.
|
||||
*/
|
||||
if (lut_sz <= nwords)
|
||||
break;
|
||||
|
||||
for (uint32_t i = 0; i < stride; i++) {
|
||||
/* We only need a CSEL if we have two values, otherwise we pick the
|
||||
* non-NULL value.
|
||||
*/
|
||||
if (lut[i] && lut[i + stride]) {
|
||||
/* Create the test src on-demand. The stride is in 32-bit words,
|
||||
* multiply by four to convert it into a byte stride we can use
|
||||
* to test if the corresponding bit is set in the index src.
|
||||
*/
|
||||
if (!bit_test)
|
||||
bit_test = nir_i2b(b, nir_iand_imm(b, index, stride * 4));
|
||||
|
||||
lut[i] = nir_bcsel(b, bit_test, lut[i + stride], lut[i]);
|
||||
} else if (lut[i + stride]) {
|
||||
lut[i] = lut[i + stride];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nir_def *res = nir_vec(b, &lut[0], nwords);
|
||||
|
||||
nir_def_rewrite_uses(&intr->def, res);
|
||||
nir_instr_remove(&intr->instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
|
||||
{
|
||||
@@ -5318,10 +5238,6 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
|
||||
};
|
||||
NIR_PASS(_, nir, nir_lower_mem_access_bit_sizes, &mem_size_options);
|
||||
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass,
|
||||
bi_lower_load_push_const_with_dyn_offset, nir_metadata_control_flow,
|
||||
NULL);
|
||||
|
||||
nir_lower_ssbo_options ssbo_opts = {
|
||||
.native_loads = pan_arch(gpu_id) >= 9,
|
||||
.native_offset = pan_arch(gpu_id) >= 9,
|
||||
|
Reference in New Issue
Block a user