intel/compiler/xe2: Fix for the removal of most predication modes.

Reworks:
* Remove changes to fixup_nomask workaround since it applies only for
  Gfx12 family.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26860>
This commit is contained in:
Francisco Jerez
2022-07-22 17:11:52 -07:00
committed by Marge Bot
parent f79123e1d9
commit f974eacab3
5 changed files with 61 additions and 37 deletions

View File

@@ -227,6 +227,12 @@ static const char *const pred_ctrl_align1[16] = {
[BRW_PREDICATE_ALIGN1_ALL32H] = ".all32h", [BRW_PREDICATE_ALIGN1_ALL32H] = ".all32h",
}; };
static const char *const xe2_pred_ctrl[4] = {
[BRW_PREDICATE_NORMAL] = "",
[XE2_PREDICATE_ANY] = ".any",
[XE2_PREDICATE_ALL] = ".all",
};
static const char *const thread_ctrl[4] = { static const char *const thread_ctrl[4] = {
[BRW_THREAD_NORMAL] = "", [BRW_THREAD_NORMAL] = "",
[BRW_THREAD_ATOMIC] = "atomic", [BRW_THREAD_ATOMIC] = "atomic",
@@ -2059,7 +2065,10 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa,
format(file, "f%"PRIu64".%"PRIu64, format(file, "f%"PRIu64".%"PRIu64,
devinfo->ver >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0, devinfo->ver >= 7 ? brw_inst_flag_reg_nr(devinfo, inst) : 0,
brw_inst_flag_subreg_nr(devinfo, inst)); brw_inst_flag_subreg_nr(devinfo, inst));
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { if (devinfo->ver >= 20) {
err |= control(file, "predicate control", xe2_pred_ctrl,
brw_inst_pred_control(devinfo, inst), NULL);
} else if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
err |= control(file, "predicate control align1", pred_ctrl_align1, err |= control(file, "predicate control align1", pred_ctrl_align1,
brw_inst_pred_control(devinfo, inst), NULL); brw_inst_pred_control(devinfo, inst), NULL);
} else { } else {

View File

@@ -1032,6 +1032,8 @@ enum ENUM_PACKED brw_predicate {
BRW_PREDICATE_ALIGN16_REPLICATE_W = 5, BRW_PREDICATE_ALIGN16_REPLICATE_W = 5,
BRW_PREDICATE_ALIGN16_ANY4H = 6, BRW_PREDICATE_ALIGN16_ANY4H = 6,
BRW_PREDICATE_ALIGN16_ALL4H = 7, BRW_PREDICATE_ALIGN16_ALL4H = 7,
XE2_PREDICATE_ANY = 2,
XE2_PREDICATE_ALL = 3
}; };
enum ENUM_PACKED brw_reg_file { enum ENUM_PACKED brw_reg_file {

View File

@@ -984,8 +984,11 @@ fs_inst::size_read(int arg) const
namespace { namespace {
unsigned unsigned
predicate_width(brw_predicate predicate) predicate_width(const intel_device_info *devinfo, brw_predicate predicate)
{ {
if (devinfo->ver >= 20) {
return 1;
} else {
switch (predicate) { switch (predicate) {
case BRW_PREDICATE_NONE: return 1; case BRW_PREDICATE_NONE: return 1;
case BRW_PREDICATE_NORMAL: return 1; case BRW_PREDICATE_NORMAL: return 1;
@@ -1002,6 +1005,7 @@ namespace {
default: unreachable("Unsupported predicate"); default: unreachable("Unsupported predicate");
} }
} }
}
/* Return the subset of flag registers that an instruction could /* Return the subset of flag registers that an instruction could
* potentially read or write based on the execution controls and flag * potentially read or write based on the execution controls and flag
@@ -1039,15 +1043,15 @@ namespace {
unsigned unsigned
fs_inst::flags_read(const intel_device_info *devinfo) const fs_inst::flags_read(const intel_device_info *devinfo) const
{ {
if (predicate == BRW_PREDICATE_ALIGN1_ANYV || if (devinfo->ver < 20 && (predicate == BRW_PREDICATE_ALIGN1_ANYV ||
predicate == BRW_PREDICATE_ALIGN1_ALLV) { predicate == BRW_PREDICATE_ALIGN1_ALLV)) {
/* The vertical predication modes combine corresponding bits from /* The vertical predication modes combine corresponding bits from
* f0.0 and f1.0 on Gfx7+, and f0.0 and f0.1 on older hardware. * f0.0 and f1.0 on Gfx7+, and f0.0 and f0.1 on older hardware.
*/ */
const unsigned shift = devinfo->ver >= 7 ? 4 : 2; const unsigned shift = devinfo->ver >= 7 ? 4 : 2;
return flag_mask(this, 1) << shift | flag_mask(this, 1); return flag_mask(this, 1) << shift | flag_mask(this, 1);
} else if (predicate) { } else if (predicate) {
return flag_mask(this, predicate_width(predicate)); return flag_mask(this, predicate_width(devinfo, predicate));
} else { } else {
unsigned mask = 0; unsigned mask = 0;
for (int i = 0; i < sources; i++) { for (int i = 0; i < sources; i++) {
@@ -4607,6 +4611,7 @@ brw_emit_predicate_on_sample_mask(const fs_builder &bld, fs_inst *inst)
assert(inst->predicate == BRW_PREDICATE_NORMAL); assert(inst->predicate == BRW_PREDICATE_NORMAL);
assert(!inst->predicate_inverse); assert(!inst->predicate_inverse);
assert(inst->flag_subreg == 0); assert(inst->flag_subreg == 0);
assert(s.devinfo->ver < 20);
/* Combine the sample mask with the existing predicate by using a /* Combine the sample mask with the existing predicate by using a
* vertical predication mode. * vertical predication mode.
*/ */

View File

@@ -4176,7 +4176,8 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
/* Only jump when the whole quad is demoted. For historical /* Only jump when the whole quad is demoted. For historical
* reasons this is also used for discard. * reasons this is also used for discard.
*/ */
jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H; jump->predicate = (devinfo->ver >= 20 ? XE2_PREDICATE_ANY :
BRW_PREDICATE_ALIGN1_ANY4H);
} }
if (devinfo->ver < 7) if (devinfo->ver < 7)
@@ -7167,7 +7168,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
unreachable("not reached"); unreachable("not reached");
case nir_intrinsic_vote_any: { case nir_intrinsic_vote_any: {
const fs_builder ubld = bld.exec_all().group(1, 0); const fs_builder ubld1 = bld.exec_all().group(1, 0);
/* The any/all predicates do not consider channel enables. To prevent /* The any/all predicates do not consider channel enables. To prevent
* dead channels from affecting the result, we initialize the flag with * dead channels from affecting the result, we initialize the flag with
@@ -7175,10 +7176,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
*/ */
if (s.dispatch_width == 32) { if (s.dispatch_width == 32) {
/* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD), ubld1.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
brw_imm_ud(0)); brw_imm_ud(0));
} else { } else {
ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0)); ubld1.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
} }
bld.CMP(bld.null_reg_d(), get_nir_src(ntb, instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ); bld.CMP(bld.null_reg_d(), get_nir_src(ntb, instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
@@ -7188,9 +7189,11 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
* getting garbage in the second half. Work around this by using a pair * getting garbage in the second half. Work around this by using a pair
* of 1-wide MOVs and scattering the result. * of 1-wide MOVs and scattering the result.
*/ */
const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1;
fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D); fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D);
ubld.MOV(res1, brw_imm_d(0)); ubld.MOV(res1, brw_imm_d(0));
set_predicate(s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ANY8H : set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ANY :
s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ANY8H :
s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ANY16H : s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ANY16H :
BRW_PREDICATE_ALIGN1_ANY32H, BRW_PREDICATE_ALIGN1_ANY32H,
ubld.MOV(res1, brw_imm_d(-1))); ubld.MOV(res1, brw_imm_d(-1)));
@@ -7199,7 +7202,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
break; break;
} }
case nir_intrinsic_vote_all: { case nir_intrinsic_vote_all: {
const fs_builder ubld = bld.exec_all().group(1, 0); const fs_builder ubld1 = bld.exec_all().group(1, 0);
/* The any/all predicates do not consider channel enables. To prevent /* The any/all predicates do not consider channel enables. To prevent
* dead channels from affecting the result, we initialize the flag with * dead channels from affecting the result, we initialize the flag with
@@ -7207,10 +7210,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
*/ */
if (s.dispatch_width == 32) { if (s.dispatch_width == 32) {
/* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD), ubld1.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
brw_imm_ud(0xffffffff)); brw_imm_ud(0xffffffff));
} else { } else {
ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff)); ubld1.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
} }
bld.CMP(bld.null_reg_d(), get_nir_src(ntb, instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ); bld.CMP(bld.null_reg_d(), get_nir_src(ntb, instr->src[0]), brw_imm_d(0), BRW_CONDITIONAL_NZ);
@@ -7220,9 +7223,11 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
* getting garbage in the second half. Work around this by using a pair * getting garbage in the second half. Work around this by using a pair
* of 1-wide MOVs and scattering the result. * of 1-wide MOVs and scattering the result.
*/ */
const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1;
fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D); fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D);
ubld.MOV(res1, brw_imm_d(0)); ubld.MOV(res1, brw_imm_d(0));
set_predicate(s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H : set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ALL :
s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H :
s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H : s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H :
BRW_PREDICATE_ALIGN1_ALL32H, BRW_PREDICATE_ALIGN1_ALL32H,
ubld.MOV(res1, brw_imm_d(-1))); ubld.MOV(res1, brw_imm_d(-1)));
@@ -7240,7 +7245,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
} }
fs_reg uniformized = bld.emit_uniformize(value); fs_reg uniformized = bld.emit_uniformize(value);
const fs_builder ubld = bld.exec_all().group(1, 0); const fs_builder ubld1 = bld.exec_all().group(1, 0);
/* The any/all predicates do not consider channel enables. To prevent /* The any/all predicates do not consider channel enables. To prevent
* dead channels from affecting the result, we initialize the flag with * dead channels from affecting the result, we initialize the flag with
@@ -7248,10 +7253,10 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
*/ */
if (s.dispatch_width == 32) { if (s.dispatch_width == 32) {
/* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */ /* For SIMD32, we use a UD type so we fill both f0.0 and f0.1. */
ubld.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD), ubld1.MOV(retype(brw_flag_reg(0, 0), BRW_REGISTER_TYPE_UD),
brw_imm_ud(0xffffffff)); brw_imm_ud(0xffffffff));
} else { } else {
ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff)); ubld1.MOV(brw_flag_reg(0, 0), brw_imm_uw(0xffff));
} }
bld.CMP(bld.null_reg_d(), value, uniformized, BRW_CONDITIONAL_Z); bld.CMP(bld.null_reg_d(), value, uniformized, BRW_CONDITIONAL_Z);
@@ -7261,9 +7266,11 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
* getting garbage in the second half. Work around this by using a pair * getting garbage in the second half. Work around this by using a pair
* of 1-wide MOVs and scattering the result. * of 1-wide MOVs and scattering the result.
*/ */
const fs_builder ubld = devinfo->ver >= 20 ? bld.exec_all() : ubld1;
fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D); fs_reg res1 = ubld.vgrf(BRW_REGISTER_TYPE_D);
ubld.MOV(res1, brw_imm_d(0)); ubld.MOV(res1, brw_imm_d(0));
set_predicate(s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H : set_predicate(devinfo->ver >= 20 ? XE2_PREDICATE_ALL :
s.dispatch_width == 8 ? BRW_PREDICATE_ALIGN1_ALL8H :
s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H : s.dispatch_width == 16 ? BRW_PREDICATE_ALIGN1_ALL16H :
BRW_PREDICATE_ALIGN1_ALL32H, BRW_PREDICATE_ALIGN1_ALL32H,
ubld.MOV(res1, brw_imm_d(-1))); ubld.MOV(res1, brw_imm_d(-1)));

View File

@@ -1514,6 +1514,7 @@ emit_predicate_on_vector_mask(const fs_builder &bld, fs_inst *inst)
assert(inst->predicate == BRW_PREDICATE_NORMAL); assert(inst->predicate == BRW_PREDICATE_NORMAL);
assert(!inst->predicate_inverse); assert(!inst->predicate_inverse);
assert(inst->flag_subreg == 0); assert(inst->flag_subreg == 0);
assert(s.devinfo->ver < 20);
/* Combine the vector mask with the existing predicate by using a /* Combine the vector mask with the existing predicate by using a
* vertical predication mode. * vertical predication mode.
*/ */