ir3: use fully-functional dp4acc when available
a750 improves dp4acc to have support for all dot product variants. The main difference with dp4acc of previous generations is that the signedness and packed instruction fields have to be instead interpreted as signedness of either side of the dot product. Signed-off-by: Zan Dobersek <zdobersek@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29875>
This commit is contained in:
@@ -290,6 +290,8 @@ struct fd_dev_info {
|
||||
* R8G8B8A8_UNORM in the mutable formats list.
|
||||
*/
|
||||
bool ubwc_all_formats_compatible;
|
||||
|
||||
bool has_compliant_dp4acc;
|
||||
} a7xx;
|
||||
};
|
||||
|
||||
|
@@ -881,6 +881,7 @@ a7xx_750 = A7XXProps(
|
||||
gs_vpc_adjacency_quirk = True,
|
||||
storage_8bit = True,
|
||||
ubwc_all_formats_compatible = True,
|
||||
has_compliant_dp4acc = True,
|
||||
)
|
||||
|
||||
a730_magic_regs = dict(
|
||||
|
@@ -206,6 +206,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
||||
|
||||
compiler->has_dp2acc = dev_info->a6xx.has_dp2acc;
|
||||
compiler->has_dp4acc = dev_info->a6xx.has_dp4acc;
|
||||
compiler->has_compliant_dp4acc = dev_info->a7xx.has_compliant_dp4acc;
|
||||
|
||||
if (compiler->gen == 6 && options->shared_push_consts) {
|
||||
compiler->shared_consts_base_offset = 504;
|
||||
@@ -301,14 +302,19 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
||||
if (compiler->gen >= 6) {
|
||||
compiler->nir_options.vectorize_io = true,
|
||||
compiler->nir_options.force_indirect_unrolling = nir_var_all,
|
||||
compiler->nir_options.lower_device_index_to_zero = true;
|
||||
|
||||
compiler->nir_options.lower_device_index_to_zero = true,
|
||||
compiler->nir_options.has_udot_4x8 = true,
|
||||
compiler->nir_options.has_sudot_4x8 = true,
|
||||
compiler->nir_options.has_udot_4x8 = dev_info->a6xx.has_dp2acc;
|
||||
compiler->nir_options.has_sudot_4x8 = dev_info->a6xx.has_dp2acc;
|
||||
compiler->nir_options.has_udot_4x8_sat = dev_info->a6xx.has_dp2acc;
|
||||
compiler->nir_options.has_sudot_4x8_sat = dev_info->a6xx.has_dp2acc;
|
||||
if (dev_info->a6xx.has_dp2acc || dev_info->a6xx.has_dp4acc) {
|
||||
compiler->nir_options.has_udot_4x8 =
|
||||
compiler->nir_options.has_udot_4x8_sat = true;
|
||||
compiler->nir_options.has_sudot_4x8 =
|
||||
compiler->nir_options.has_sudot_4x8_sat = true;
|
||||
}
|
||||
|
||||
if (dev_info->a6xx.has_dp4acc && dev_info->a7xx.has_compliant_dp4acc) {
|
||||
compiler->nir_options.has_sdot_4x8 =
|
||||
compiler->nir_options.has_sdot_4x8_sat = true;
|
||||
}
|
||||
} else if (compiler->gen >= 3 && compiler->gen <= 5) {
|
||||
compiler->nir_options.vertex_id_zero_based = true;
|
||||
} else if (compiler->gen <= 2) {
|
||||
|
@@ -239,6 +239,7 @@ struct ir3_compiler {
|
||||
|
||||
bool has_dp2acc;
|
||||
bool has_dp4acc;
|
||||
bool has_compliant_dp4acc;
|
||||
|
||||
/* Type to use for 1b nir bools: */
|
||||
type_t bool_type;
|
||||
|
@@ -356,6 +356,37 @@ emit_alu_dot_4x8_as_dp4acc(struct ir3_context *ctx, nir_alu_instr *alu,
|
||||
struct ir3_instruction **dst,
|
||||
struct ir3_instruction **src)
|
||||
{
|
||||
if (ctx->compiler->has_compliant_dp4acc) {
|
||||
dst[0] = ir3_DP4ACC(ctx->block, src[0], 0, src[1], 0, src[2], 0);
|
||||
|
||||
/* This is actually the LHS signedness attribute.
|
||||
* IR3_SRC_UNSIGNED ~ unsigned LHS (i.e. OpUDot and OpUDotAccSat).
|
||||
*/
|
||||
if (alu->op == nir_op_udot_4x8_uadd ||
|
||||
alu->op == nir_op_udot_4x8_uadd_sat) {
|
||||
dst[0]->cat3.signedness = IR3_SRC_UNSIGNED;
|
||||
} else {
|
||||
dst[0]->cat3.signedness = IR3_SRC_MIXED;
|
||||
}
|
||||
|
||||
/* This is actually the RHS signedness attribute.
|
||||
* IR3_SRC_PACKED_HIGH ~ signed RHS (i.e. OpSDot and OpSDotAccSat).
|
||||
*/
|
||||
if (alu->op == nir_op_sdot_4x8_iadd ||
|
||||
alu->op == nir_op_sdot_4x8_iadd_sat) {
|
||||
dst[0]->cat3.packed = IR3_SRC_PACKED_HIGH;
|
||||
} else {
|
||||
dst[0]->cat3.packed = IR3_SRC_PACKED_LOW;
|
||||
}
|
||||
|
||||
if (alu->op == nir_op_udot_4x8_uadd_sat ||
|
||||
alu->op == nir_op_sdot_4x8_iadd_sat ||
|
||||
alu->op == nir_op_sudot_4x8_iadd_sat) {
|
||||
dst[0]->flags |= IR3_INSTR_SAT;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
struct ir3_instruction *accumulator = NULL;
|
||||
if (alu->op == nir_op_udot_4x8_uadd_sat) {
|
||||
accumulator = create_immed(ctx->block, 0);
|
||||
@@ -446,6 +477,8 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
|
||||
/* it probably isn't worth emulating these with scalar-only ops */
|
||||
alu->op != nir_op_udot_4x8_uadd &&
|
||||
alu->op != nir_op_udot_4x8_uadd_sat &&
|
||||
alu->op != nir_op_sdot_4x8_iadd &&
|
||||
alu->op != nir_op_sdot_4x8_iadd_sat &&
|
||||
alu->op != nir_op_sudot_4x8_iadd &&
|
||||
alu->op != nir_op_sudot_4x8_iadd_sat &&
|
||||
/* not supported in HW, we have to fall back to normal registers */
|
||||
@@ -896,6 +929,8 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
|
||||
|
||||
case nir_op_udot_4x8_uadd:
|
||||
case nir_op_udot_4x8_uadd_sat:
|
||||
case nir_op_sdot_4x8_iadd:
|
||||
case nir_op_sdot_4x8_iadd_sat:
|
||||
case nir_op_sudot_4x8_iadd:
|
||||
case nir_op_sudot_4x8_iadd_sat: {
|
||||
if (ctx->compiler->has_dp4acc) {
|
||||
|
Reference in New Issue
Block a user