agx: Lower shared memory offsets to 16-bit

Per the hardware requirement. This simplifies instruction selection (it avoids
the need to constant fold u2u16 in the backend).

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21326>
This commit is contained in:
Alyssa Rosenzweig
2023-02-08 20:46:48 -05:00
committed by Marge Bot
parent a21f6f8cb0
commit 14f546726e
4 changed files with 53 additions and 0 deletions

View File

@@ -2248,6 +2248,7 @@ agx_preprocess_nir(nir_shader *nir)
NIR_PASS_V(nir, nir_opt_sink, move_all); NIR_PASS_V(nir, nir_opt_sink, move_all);
NIR_PASS_V(nir, nir_opt_move, move_all); NIR_PASS_V(nir, nir_opt_move, move_all);
NIR_PASS_V(nir, agx_nir_lower_ubo); NIR_PASS_V(nir, agx_nir_lower_ubo);
NIR_PASS_V(nir, agx_nir_lower_shared_bitsize);
NIR_PASS_V(nir, nir_lower_ssbo); NIR_PASS_V(nir, nir_lower_ssbo);
} }

View File

@@ -830,6 +830,7 @@ bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
bool agx_nir_lower_load_mask(nir_shader *shader); bool agx_nir_lower_load_mask(nir_shader *shader);
bool agx_nir_lower_address(nir_shader *shader); bool agx_nir_lower_address(nir_shader *shader);
bool agx_nir_lower_ubo(nir_shader *shader); bool agx_nir_lower_ubo(nir_shader *shader);
bool agx_nir_lower_shared_bitsize(nir_shader *shader);
#ifdef __cplusplus #ifdef __cplusplus
} /* extern C */ } /* extern C */

View File

@@ -0,0 +1,50 @@
/*
* Copyright 2022 Alyssa Rosenzweig
* SPDX-License-Identifier: MIT
*/
#include "compiler/nir/nir_builder.h"
#include "agx_compiler.h"
/* Local memory instructions require 16-bit offsets, so we add conversions. */
static bool
pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_shared:
case nir_intrinsic_store_shared:
case nir_intrinsic_shared_atomic_add:
case nir_intrinsic_shared_atomic_imin:
case nir_intrinsic_shared_atomic_umin:
case nir_intrinsic_shared_atomic_imax:
case nir_intrinsic_shared_atomic_umax:
case nir_intrinsic_shared_atomic_and:
case nir_intrinsic_shared_atomic_or:
case nir_intrinsic_shared_atomic_xor:
case nir_intrinsic_shared_atomic_exchange:
case nir_intrinsic_shared_atomic_comp_swap:
break;
default:
return false;
}
nir_src *offset = nir_get_io_offset_src(intr);
if (nir_src_bit_size(*offset) == 16)
return false;
b->cursor = nir_before_instr(instr);
nir_instr_rewrite_src_ssa(instr, offset,
nir_u2u16(b, nir_ssa_for_src(b, *offset, 1)));
return true;
}
bool
agx_nir_lower_shared_bitsize(nir_shader *shader)
{
return nir_shader_instructions_pass(
shader, pass, nir_metadata_block_index | nir_metadata_dominance, NULL);
}

View File

@@ -28,6 +28,7 @@ libasahi_agx_files = files(
'agx_nir_lower_zs_emit.c', 'agx_nir_lower_zs_emit.c',
'agx_nir_lower_texture.c', 'agx_nir_lower_texture.c',
'agx_nir_lower_load_mask.c', 'agx_nir_lower_load_mask.c',
'agx_nir_lower_shared_bitsize.c',
'agx_nir_lower_ubo.c', 'agx_nir_lower_ubo.c',
'agx_nir_opt_preamble.c', 'agx_nir_opt_preamble.c',
'agx_lower_64bit.c', 'agx_lower_64bit.c',