ir3: use isam.v for multi-component SSBO loads
Since a7xx, isam.v can be used to perform multi-component SSBO loads. Use this whenever possible to prevent excessive scalarization. isam.v also uses only a single coordinate (as opposed to a 2-dimensional coordinate for isam) so this reduces register pressure as well. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28664>
This commit is contained in:
@@ -175,6 +175,8 @@ struct fd_dev_info {
|
||||
/* See ir3_compiler::has_scalar_alu. */
|
||||
bool has_scalar_alu;
|
||||
|
||||
bool has_isam_v;
|
||||
|
||||
/* Whether writing to UBWC attachment and reading the same image as input
|
||||
* attachment or as a texture reads correct values from the image.
|
||||
* If this is false, we may read stale values from the flag buffer,
|
||||
|
@@ -414,6 +414,7 @@ a6xx_gen4 = A6XXProps(
|
||||
has_lrz_dir_tracking = True,
|
||||
has_per_view_viewport = True,
|
||||
has_scalar_alu = True,
|
||||
has_isam_v = True,
|
||||
)
|
||||
|
||||
a6xx_a690_quirk = A6XXProps(
|
||||
@@ -794,6 +795,7 @@ a7xx_base = A6XXProps(
|
||||
line_width_max = 127.5,
|
||||
has_scalar_alu = True,
|
||||
has_coherent_ubwc_flag_caches = True,
|
||||
has_isam_v = True,
|
||||
)
|
||||
|
||||
a7xx_725 = A7XXProps(
|
||||
|
@@ -1258,6 +1258,12 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
|
||||
return false;
|
||||
break;
|
||||
case 5:
|
||||
if (instr->opc == OPC_ISAM && (instr->flags & IR3_INSTR_V)) {
|
||||
if (((instr->flags & IR3_INSTR_S2EN) && n == 2) ||
|
||||
(!(instr->flags & IR3_INSTR_S2EN) && n == 1)) {
|
||||
return flags == IR3_REG_IMMED;
|
||||
}
|
||||
}
|
||||
/* no flags allowed */
|
||||
if (flags)
|
||||
return false;
|
||||
|
@@ -224,6 +224,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
||||
compiler->has_branch_and_or = true;
|
||||
compiler->has_predication = true;
|
||||
compiler->has_scalar_alu = dev_info->a6xx.has_scalar_alu;
|
||||
compiler->has_isam_v = dev_info->a6xx.has_isam_v;
|
||||
compiler->fs_must_have_non_zero_constlen_quirk = dev_info->a7xx.fs_must_have_non_zero_constlen_quirk;
|
||||
} else {
|
||||
compiler->max_const_pipeline = 512;
|
||||
@@ -237,6 +238,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
||||
compiler->max_const_safe = 256;
|
||||
|
||||
compiler->has_scalar_alu = false;
|
||||
compiler->has_isam_v = false;
|
||||
}
|
||||
|
||||
/* This is just a guess for a4xx. */
|
||||
|
@@ -208,6 +208,9 @@ struct ir3_compiler {
|
||||
/* Whether SSBOs have descriptors for sampling with ISAM */
|
||||
bool has_isam_ssbo;
|
||||
|
||||
/* Whether isam.v is supported to sample multiple components from SSBOs */
|
||||
bool has_isam_v;
|
||||
|
||||
/* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */
|
||||
bool storage_16bit;
|
||||
|
||||
|
@@ -1592,9 +1592,11 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
/* Note: isam currently can't handle vectorized loads/stores */
|
||||
/* Note: we can only use isam for vectorized loads/stores if isam.v is
|
||||
* available.
|
||||
*/
|
||||
if (!(nir_intrinsic_access(intr) & ACCESS_CAN_REORDER) ||
|
||||
intr->def.num_components > 1 ||
|
||||
(intr->def.num_components > 1 && !ctx->compiler->has_isam_v) ||
|
||||
!ctx->compiler->has_isam_ssbo) {
|
||||
ctx->funcs->emit_intrinsic_load_ssbo(ctx, intr, dst);
|
||||
return;
|
||||
@@ -1602,13 +1604,27 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx,
|
||||
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[2])[0];
|
||||
struct ir3_instruction *coords = ir3_collect(b, offset, create_immed(b, 0));
|
||||
struct ir3_instruction *coords = NULL;
|
||||
unsigned imm_offset = 0;
|
||||
|
||||
if (ctx->compiler->has_isam_v) {
|
||||
coords = offset;
|
||||
} else {
|
||||
coords = ir3_collect(b, offset, create_immed(b, 0));
|
||||
}
|
||||
|
||||
struct tex_src_info info = get_image_ssbo_samp_tex_src(ctx, &intr->src[0], false);
|
||||
|
||||
unsigned num_components = intr->def.num_components;
|
||||
assert(num_components == 1 || ctx->compiler->has_isam_v);
|
||||
|
||||
struct ir3_instruction *sam =
|
||||
emit_sam(ctx, OPC_ISAM, info, utype_for_size(intr->def.bit_size),
|
||||
MASK(num_components), coords, NULL);
|
||||
MASK(num_components), coords, create_immed(b, imm_offset));
|
||||
|
||||
if (ctx->compiler->has_isam_v) {
|
||||
sam->flags |= (IR3_INSTR_V | IR3_INSTR_INV_1D);
|
||||
}
|
||||
|
||||
ir3_handle_nonuniform(sam, intr);
|
||||
|
||||
|
@@ -444,6 +444,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
|
||||
(opc_cat(instr->opc) == 2) ||
|
||||
(opc_cat(instr->opc) == 6) ||
|
||||
is_meta(instr) ||
|
||||
(instr->opc == OPC_ISAM && (n == 1 || n == 2)) ||
|
||||
(is_mad(instr->opc) && (n == 0)));
|
||||
|
||||
if ((opc_cat(instr->opc) == 2) &&
|
||||
|
@@ -92,10 +92,11 @@ ir3_nir_should_scalarize_mem(const nir_instr *instr, const void *data)
|
||||
|
||||
/* Scalarize load_ssbo's that we could otherwise lower to isam,
|
||||
* as the tex cache benefit outweighs the benefit of vectorizing
|
||||
* Don't do this if (vectorized) isam.v is supported.
|
||||
*/
|
||||
if ((intrin->intrinsic == nir_intrinsic_load_ssbo) &&
|
||||
(nir_intrinsic_access(intrin) & ACCESS_CAN_REORDER) &&
|
||||
compiler->has_isam_ssbo) {
|
||||
compiler->has_isam_ssbo && !compiler->has_isam_v) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -112,11 +113,12 @@ ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||
unsigned byte_size = bit_size / 8;
|
||||
|
||||
/* Don't vectorize load_ssbo's that we could otherwise lower to isam,
|
||||
* as the tex cache benefit outweighs the benefit of vectorizing
|
||||
* as the tex cache benefit outweighs the benefit of vectorizing. If we
|
||||
* support isam.v, we can vectorize this though.
|
||||
*/
|
||||
if ((low->intrinsic == nir_intrinsic_load_ssbo) &&
|
||||
(nir_intrinsic_access(low) & ACCESS_CAN_REORDER) &&
|
||||
compiler->has_isam_ssbo) {
|
||||
compiler->has_isam_ssbo && !compiler->has_isam_v) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user