pan/bi: Use write masks on Valhall texture instrs

I noticed a sequence like the following in a scheduled SuperTuxKart shader:

   TEX_SINGLE.slot0 @r0:r1, ..
   LD_VAR.wait0 @r2, ...
   FMA r1, ...

Why do we stall waiting for the TEX_SINGLE instruction when it's not actually
read? Because its upper channels are *never* read, leading to a
write-after-write dependency when the register allocator puts some unrelated ALU
destination in there. By appropriately masking the texture instruction's write,
that false dependency disappears, avoiding the stall.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20426>
This commit is contained in:
Alyssa Rosenzweig
2022-11-30 21:29:43 -05:00
committed by Marge Bot
parent 7d9c771b9b
commit bd83e5ddaf

View File

@@ -3780,8 +3780,12 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
image_src = bi_lshift_or_i32(b, sampler, image_src, bi_imm_u8(0));
image_src = bi_lshift_or_i32(b, texture, image_src, bi_imm_u8(16));
unsigned mask = BI_WRITE_MASK_RGBA;
unsigned res_size = nir_dest_bit_size(instr->dest) == 16 ? 2 : 4;
/* Only write the components that we actually read */
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
unsigned comps_per_reg = nir_dest_bit_size(instr->dest) == 16 ? 2 : 1;
unsigned res_size = DIV_ROUND_UP(util_bitcount(mask), comps_per_reg);
enum bi_register_format regfmt = bi_reg_fmt_for_nir(instr->dest_type);
enum bi_dimension dim = valhall_tex_dimension(instr->sampler_dim);
bi_index dest = bi_temp(b->shader);
@@ -3810,10 +3814,32 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
unreachable("Unhandled Valhall texture op");
}
bi_index w[4] = { bi_null(), bi_null(), bi_null(), bi_null() };
bi_emit_split_i32(b, w, dest, res_size);
bi_emit_collect_to(b, bi_dest_index(&instr->dest), w,
DIV_ROUND_UP(nir_dest_num_components(instr->dest) * res_size, 4));
/* The hardware will write only what we read, and it will into
* contiguous registers without gaps (different from Bifrost). NIR
* expects the gaps, so fill in the holes (they'll be copypropped and
* DCE'd away later).
*/
bi_index unpacked[4] = { bi_null(), bi_null(), bi_null(), bi_null() };
bi_emit_cached_split_i32(b, dest, res_size);
/* Index into the packed component array */
unsigned j = 0;
unsigned comps[4] = { 0 };
unsigned nr_components = nir_dest_num_components(instr->dest);
for (unsigned i = 0; i < nr_components; ++i) {
if (mask & BITFIELD_BIT(i)) {
unpacked[i] = dest;
comps[i] = j++;
} else {
unpacked[i] = bi_zero();
}
}
bi_make_vec_to(b, bi_dest_index(&instr->dest), unpacked,
comps, nir_dest_num_components(instr->dest),
nir_dest_bit_size(instr->dest));
}
/* Simple textures ops correspond to NIR tex or txl with LOD = 0 on 2D/cube