ir3: use fully-functional dp4acc when available

a750 improves dp4acc to have support for all dot product variants. The
main difference with dp4acc of previous generations is that the signedness
and packed instruction fields have to be instead interpreted as signedness
of either side of the dot product.

Signed-off-by: Zan Dobersek <zdobersek@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29875>
This commit is contained in:
Zan Dobersek
2024-07-14 13:31:28 +02:00
committed by Marge Bot
parent 8aa2cad5df
commit 9e0b77d5c3
5 changed files with 52 additions and 7 deletions

View File

@@ -290,6 +290,8 @@ struct fd_dev_info {
* R8G8B8A8_UNORM in the mutable formats list.
*/
bool ubwc_all_formats_compatible;
bool has_compliant_dp4acc;
} a7xx;
};

View File

@@ -881,6 +881,7 @@ a7xx_750 = A7XXProps(
gs_vpc_adjacency_quirk = True,
storage_8bit = True,
ubwc_all_formats_compatible = True,
has_compliant_dp4acc = True,
)
a730_magic_regs = dict(

View File

@@ -206,6 +206,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
compiler->has_dp2acc = dev_info->a6xx.has_dp2acc;
compiler->has_dp4acc = dev_info->a6xx.has_dp4acc;
compiler->has_compliant_dp4acc = dev_info->a7xx.has_compliant_dp4acc;
if (compiler->gen == 6 && options->shared_push_consts) {
compiler->shared_consts_base_offset = 504;
@@ -301,14 +302,19 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
if (compiler->gen >= 6) {
compiler->nir_options.vectorize_io = true,
compiler->nir_options.force_indirect_unrolling = nir_var_all,
compiler->nir_options.lower_device_index_to_zero = true;
compiler->nir_options.lower_device_index_to_zero = true,
compiler->nir_options.has_udot_4x8 = true,
compiler->nir_options.has_sudot_4x8 = true,
compiler->nir_options.has_udot_4x8 = dev_info->a6xx.has_dp2acc;
compiler->nir_options.has_sudot_4x8 = dev_info->a6xx.has_dp2acc;
compiler->nir_options.has_udot_4x8_sat = dev_info->a6xx.has_dp2acc;
compiler->nir_options.has_sudot_4x8_sat = dev_info->a6xx.has_dp2acc;
if (dev_info->a6xx.has_dp2acc || dev_info->a6xx.has_dp4acc) {
compiler->nir_options.has_udot_4x8 =
compiler->nir_options.has_udot_4x8_sat = true;
compiler->nir_options.has_sudot_4x8 =
compiler->nir_options.has_sudot_4x8_sat = true;
}
if (dev_info->a6xx.has_dp4acc && dev_info->a7xx.has_compliant_dp4acc) {
compiler->nir_options.has_sdot_4x8 =
compiler->nir_options.has_sdot_4x8_sat = true;
}
} else if (compiler->gen >= 3 && compiler->gen <= 5) {
compiler->nir_options.vertex_id_zero_based = true;
} else if (compiler->gen <= 2) {

View File

@@ -239,6 +239,7 @@ struct ir3_compiler {
bool has_dp2acc;
bool has_dp4acc;
bool has_compliant_dp4acc;
/* Type to use for 1b nir bools: */
type_t bool_type;

View File

@@ -356,6 +356,37 @@ emit_alu_dot_4x8_as_dp4acc(struct ir3_context *ctx, nir_alu_instr *alu,
struct ir3_instruction **dst,
struct ir3_instruction **src)
{
if (ctx->compiler->has_compliant_dp4acc) {
dst[0] = ir3_DP4ACC(ctx->block, src[0], 0, src[1], 0, src[2], 0);
/* This is actually the LHS signedness attribute.
* IR3_SRC_UNSIGNED ~ unsigned LHS (i.e. OpUDot and OpUDotAccSat).
*/
if (alu->op == nir_op_udot_4x8_uadd ||
alu->op == nir_op_udot_4x8_uadd_sat) {
dst[0]->cat3.signedness = IR3_SRC_UNSIGNED;
} else {
dst[0]->cat3.signedness = IR3_SRC_MIXED;
}
/* This is actually the RHS signedness attribute.
* IR3_SRC_PACKED_HIGH ~ signed RHS (i.e. OpSDot and OpSDotAccSat).
*/
if (alu->op == nir_op_sdot_4x8_iadd ||
alu->op == nir_op_sdot_4x8_iadd_sat) {
dst[0]->cat3.packed = IR3_SRC_PACKED_HIGH;
} else {
dst[0]->cat3.packed = IR3_SRC_PACKED_LOW;
}
if (alu->op == nir_op_udot_4x8_uadd_sat ||
alu->op == nir_op_sdot_4x8_iadd_sat ||
alu->op == nir_op_sudot_4x8_iadd_sat) {
dst[0]->flags |= IR3_INSTR_SAT;
}
return;
}
struct ir3_instruction *accumulator = NULL;
if (alu->op == nir_op_udot_4x8_uadd_sat) {
accumulator = create_immed(ctx->block, 0);
@@ -446,6 +477,8 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
/* it probably isn't worth emulating these with scalar-only ops */
alu->op != nir_op_udot_4x8_uadd &&
alu->op != nir_op_udot_4x8_uadd_sat &&
alu->op != nir_op_sdot_4x8_iadd &&
alu->op != nir_op_sdot_4x8_iadd_sat &&
alu->op != nir_op_sudot_4x8_iadd &&
alu->op != nir_op_sudot_4x8_iadd_sat &&
/* not supported in HW, we have to fall back to normal registers */
@@ -896,6 +929,8 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
case nir_op_udot_4x8_uadd:
case nir_op_udot_4x8_uadd_sat:
case nir_op_sdot_4x8_iadd:
case nir_op_sdot_4x8_iadd_sat:
case nir_op_sudot_4x8_iadd:
case nir_op_sudot_4x8_iadd_sat: {
if (ctx->compiler->has_dp4acc) {