brw: Skip unread leading/trailing components in convergent block loads

The NIR vectorizer may produce block loads with unread trailing
components.  Upcoming passes may produce unread leading components
as well.  With a bit of finesse, we can skip loading those, and only
bother with the ones we actually need.  This can sometimes save us on
loads and MOVs.

v2: Skip this for SLM reads on pre-LSC platforms (caught by Lionel).

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32888>
This commit is contained in:
Kenneth Graunke
2025-01-02 00:42:36 -08:00
committed by Marge Bot
parent 4f0c852a4e
commit cfbb5ebcdd

View File

@@ -6926,7 +6926,7 @@ fs_nir_emit_memory_access(nir_to_brw_state &ntb,
srcs[MEMORY_LOGICAL_BINDING] =
get_nir_buffer_intrinsic_index(ntb, bld, instr, &no_mask_handle);
srcs[MEMORY_LOGICAL_ADDRESS] =
get_nir_src(ntb, instr->src[is_store ? 2 : 1]);
get_nir_src_imm(ntb, instr->src[is_store ? 2 : 1]);
data_src = is_atomic ? 2 : 0;
break;
@@ -7113,8 +7113,26 @@ fs_nir_emit_memory_access(nir_to_brw_state &ntb,
const fs_builder ubld = bld.exec_all().group(1, 0);
unsigned total, done;
unsigned first_read_component = 0;
if (convergent_block_load) {
/* If the address is a constant and alignment permits, skip unread
* leading and trailing components. (It's probably not worth the
* extra address math for non-constant addresses.)
*
* Note that SLM block loads on HDC platforms need to be 16B aligned.
*/
if (srcs[MEMORY_LOGICAL_ADDRESS].file == IMM &&
align >= data_bit_size / 8 &&
(devinfo->has_lsc ||
srcs[MEMORY_LOGICAL_MODE].ud != MEMORY_MODE_SHARED_LOCAL)) {
first_read_component = nir_def_first_component_read(&instr->def);
unsigned last_component = nir_def_last_component_read(&instr->def);
srcs[MEMORY_LOGICAL_ADDRESS].u64 +=
first_read_component * (data_bit_size / 8);
components = last_component - first_read_component + 1;
}
total = ALIGN(components, REG_SIZE * reg_unit(devinfo) / 4);
dest = ubld.vgrf(BRW_TYPE_UD, total);
} else {
@@ -7157,7 +7175,8 @@ fs_nir_emit_memory_access(nir_to_brw_state &ntb,
if (convergent_block_load) {
for (unsigned c = 0; c < components; c++) {
xbld.MOV(retype(offset(nir_dest, xbld, c), BRW_TYPE_UD),
xbld.MOV(retype(offset(nir_dest, xbld, first_read_component + c),
BRW_TYPE_UD),
component(dest, c));
}
}