intel/compiler: Initial bits for DPAS instruction
v2: Add brw_ir_performance.cpp and brw_fs_generator.cpp changes. Fix overlapping register allocation (via has_source_and_destination_hazard). Fix incorrect destination register file encoding. v3: Prevent lower_regioning from trying to "fix" DPAS sources. v4: Add instruction latency information for scheduling and perf estimates. v5: Remove all mention of DPASW. Suggested by Curro and Caio. Update the comment in fs_inst::has_source_and_destination_hazard. Suggested by Caio. v6: Add some comments near the src2 calculation in fs_inst::size_read. Suggested by Caio. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>
This commit is contained in:
@@ -696,6 +696,7 @@ static const struct opcode_desc opcode_descs[] = {
|
||||
{ BRW_OPCODE_DP2, 87, "dp2", 2, 1, GFX_LT(GFX11) },
|
||||
{ BRW_OPCODE_DP4A, 88, "dp4a", 3, 1, GFX_GE(GFX12) },
|
||||
{ BRW_OPCODE_LINE, 89, "line", 2, 1, GFX_LE(GFX10) },
|
||||
{ BRW_OPCODE_DPAS, 89, "dpas", 3, 1, GFX_GE(GFX125) },
|
||||
{ BRW_OPCODE_PLN, 90, "pln", 2, 1, GFX_GE(GFX45) & GFX_LE(GFX10) },
|
||||
{ BRW_OPCODE_MAD, 91, "mad", 3, 1, GFX_GE(GFX6) },
|
||||
{ BRW_OPCODE_LRP, 92, "lrp", 3, 1, GFX_GE(GFX6) & GFX_LE(GFX10) },
|
||||
|
@@ -1908,6 +1908,10 @@ void brw_CMPN(struct brw_codegen *p,
|
||||
struct brw_reg src0,
|
||||
struct brw_reg src1);
|
||||
|
||||
brw_inst *brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
|
||||
unsigned rcount, struct brw_reg dest, struct brw_reg src0,
|
||||
struct brw_reg src1, struct brw_reg src2);
|
||||
|
||||
void
|
||||
brw_untyped_atomic(struct brw_codegen *p,
|
||||
struct brw_reg dst,
|
||||
|
@@ -254,6 +254,7 @@ enum opcode {
|
||||
BRW_OPCODE_DP2,
|
||||
BRW_OPCODE_DP4A, /**< Gfx12+ */
|
||||
BRW_OPCODE_LINE,
|
||||
BRW_OPCODE_DPAS, /**< Gfx12.5+ */
|
||||
BRW_OPCODE_PLN, /**< G45+ */
|
||||
BRW_OPCODE_MAD, /**< Gfx6+ */
|
||||
BRW_OPCODE_LRP, /**< Gfx6+ */
|
||||
@@ -1137,6 +1138,24 @@ enum tgl_sbid_mode {
|
||||
TGL_SBID_SET = 4
|
||||
};
|
||||
|
||||
|
||||
enum gfx12_sub_byte_precision {
|
||||
BRW_SUB_BYTE_PRECISION_NONE = 0,
|
||||
|
||||
/** 4 bits. Signedness determined by base type */
|
||||
BRW_SUB_BYTE_PRECISION_4BIT = 1,
|
||||
|
||||
/** 2 bits. Signedness determined by base type */
|
||||
BRW_SUB_BYTE_PRECISION_2BIT = 2,
|
||||
};
|
||||
|
||||
enum gfx12_systolic_depth {
|
||||
BRW_SYSTOLIC_DEPTH_16 = 0,
|
||||
BRW_SYSTOLIC_DEPTH_2 = 1,
|
||||
BRW_SYSTOLIC_DEPTH_4 = 2,
|
||||
BRW_SYSTOLIC_DEPTH_8 = 3,
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
/**
|
||||
* Allow bitwise arithmetic of tgl_sbid_mode enums.
|
||||
|
@@ -1016,6 +1016,60 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
|
||||
return inst;
|
||||
}
|
||||
|
||||
static brw_inst *
|
||||
brw_dpas_three_src(struct brw_codegen *p, enum gfx12_systolic_depth opcode,
|
||||
unsigned sdepth, unsigned rcount, struct brw_reg dest,
|
||||
struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
|
||||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
brw_inst *inst = next_insn(p, opcode);
|
||||
|
||||
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
|
||||
brw_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
|
||||
BRW_GENERAL_REGISTER_FILE);
|
||||
brw_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, dest.nr);
|
||||
brw_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, dest.subnr);
|
||||
|
||||
if (brw_reg_type_is_floating_point(dest.type)) {
|
||||
brw_inst_set_dpas_3src_exec_type(devinfo, inst,
|
||||
BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
|
||||
} else {
|
||||
brw_inst_set_dpas_3src_exec_type(devinfo, inst,
|
||||
BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
|
||||
}
|
||||
|
||||
brw_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
|
||||
brw_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);
|
||||
|
||||
brw_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
|
||||
brw_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
|
||||
brw_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
|
||||
brw_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);
|
||||
|
||||
assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
|
||||
(src0.file == BRW_ARCHITECTURE_REGISTER_FILE &&
|
||||
src0.nr == BRW_ARF_NULL));
|
||||
|
||||
brw_inst_set_dpas_3src_src0_reg_file(devinfo, inst, src0.file);
|
||||
brw_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, src0.nr);
|
||||
brw_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, src0.subnr);
|
||||
|
||||
assert(src1.file == BRW_GENERAL_REGISTER_FILE);
|
||||
|
||||
brw_inst_set_dpas_3src_src1_reg_file(devinfo, inst, src1.file);
|
||||
brw_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, src1.nr);
|
||||
brw_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, src1.subnr);
|
||||
brw_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
|
||||
|
||||
assert(src2.file == BRW_GENERAL_REGISTER_FILE);
|
||||
|
||||
brw_inst_set_dpas_3src_src2_reg_file(devinfo, inst, src2.file);
|
||||
brw_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, src2.nr);
|
||||
brw_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, src2.subnr);
|
||||
brw_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* Convenience routines.
|
||||
@@ -1248,6 +1302,15 @@ brw_PLN(struct brw_codegen *p, struct brw_reg dest,
|
||||
return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
|
||||
}
|
||||
|
||||
brw_inst *
|
||||
brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
|
||||
unsigned rcount, struct brw_reg dest, struct brw_reg src0,
|
||||
struct brw_reg src1, struct brw_reg src2)
|
||||
{
|
||||
return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
|
||||
src1, src2);
|
||||
}
|
||||
|
||||
brw_inst *
|
||||
brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
|
||||
{
|
||||
|
@@ -687,7 +687,10 @@ general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
|
||||
return error_msg;
|
||||
|
||||
if (devinfo->ver >= 11) {
|
||||
if (num_sources == 3) {
|
||||
/* A register type of B or UB for DPAS actually means 4 bytes packed into
|
||||
* a D or UD, so it is allowed.
|
||||
*/
|
||||
if (num_sources == 3 && brw_inst_opcode(isa, inst) != BRW_OPCODE_DPAS) {
|
||||
ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
|
||||
brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
|
||||
"Byte data type is not supported for src1/2 register regioning. This includes "
|
||||
|
@@ -395,6 +395,21 @@ fs_inst::has_source_and_destination_hazard() const
|
||||
default:
|
||||
return !is_uniform(src[0]);
|
||||
}
|
||||
case BRW_OPCODE_DPAS:
|
||||
/* This is overly conservative. The actual hazard is more complicated to
|
||||
* describe. When the repeat count is N, the single instruction behaves
|
||||
* like N instructions with a repeat count of one, but the destination
|
||||
* and source registers are incremented (in somewhat complex ways) for
|
||||
* each instruction.
|
||||
*
|
||||
* This means the source and destination register is actually a range of
|
||||
* registers. The hazard exists of an earlier iteration would write a
|
||||
* register that should be read by a later iteration.
|
||||
*
|
||||
* There may be some advantage to properly modeling this, but for now,
|
||||
* be overly conservative.
|
||||
*/
|
||||
return rcount > 1;
|
||||
default:
|
||||
/* The SIMD16 compressed instruction
|
||||
*
|
||||
@@ -844,6 +859,9 @@ fs_inst::components_read(unsigned i) const
|
||||
else
|
||||
return 1;
|
||||
|
||||
case BRW_OPCODE_DPAS:
|
||||
unreachable("Do not use components_read() for DPAS.");
|
||||
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
@@ -904,6 +922,26 @@ fs_inst::size_read(int arg) const
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DPAS:
|
||||
switch (arg) {
|
||||
case 0:
|
||||
if (src[0].type == BRW_REGISTER_TYPE_HF) {
|
||||
return rcount * REG_SIZE / 2;
|
||||
} else {
|
||||
return rcount * REG_SIZE;
|
||||
}
|
||||
case 1:
|
||||
return sdepth * REG_SIZE;
|
||||
case 2:
|
||||
/* This is simpler than the formula described in the Bspec, but it
|
||||
* covers all of the cases that we support on DG2.
|
||||
*/
|
||||
return rcount * REG_SIZE;
|
||||
default:
|
||||
unreachable("Invalid source number.");
|
||||
}
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_TEX:
|
||||
case FS_OPCODE_TXB:
|
||||
case SHADER_OPCODE_TXD:
|
||||
|
@@ -834,6 +834,27 @@ namespace brw {
|
||||
return inst;
|
||||
}
|
||||
|
||||
instruction *
|
||||
DPAS(const dst_reg &dst, const src_reg &src0, const src_reg &src1, const src_reg &src2,
|
||||
unsigned sdepth, unsigned rcount) const
|
||||
{
|
||||
assert(_dispatch_width == 8);
|
||||
assert(sdepth == 8);
|
||||
assert(rcount == 1 || rcount == 2 || rcount == 4 || rcount == 8);
|
||||
|
||||
instruction *inst = emit(BRW_OPCODE_DPAS, dst, src0, src1, src2);
|
||||
inst->sdepth = sdepth;
|
||||
inst->rcount = rcount;
|
||||
|
||||
if (dst.type == BRW_REGISTER_TYPE_HF) {
|
||||
inst->size_written = rcount * REG_SIZE / 2;
|
||||
} else {
|
||||
inst->size_written = rcount * REG_SIZE;
|
||||
}
|
||||
|
||||
return inst;
|
||||
}
|
||||
|
||||
fs_visitor *shader;
|
||||
|
||||
fs_inst *BREAK() { return emit(BRW_OPCODE_BREAK); }
|
||||
|
@@ -1606,6 +1606,19 @@ fs_generator::enable_debug(const char *shader_name)
|
||||
this->shader_name = shader_name;
|
||||
}
|
||||
|
||||
static gfx12_systolic_depth
|
||||
translate_systolic_depth(unsigned d)
|
||||
{
|
||||
/* Could also return (ffs(d) - 1) & 3. */
|
||||
switch (d) {
|
||||
case 2: return BRW_SYSTOLIC_DEPTH_2;
|
||||
case 4: return BRW_SYSTOLIC_DEPTH_4;
|
||||
case 8: return BRW_SYSTOLIC_DEPTH_8;
|
||||
case 16: return BRW_SYSTOLIC_DEPTH_16;
|
||||
default: unreachable("Invalid systolic depth.");
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
||||
struct shader_stats shader_stats,
|
||||
@@ -1791,6 +1804,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
||||
brw_LINE(p, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DPAS:
|
||||
assert(devinfo->verx10 >= 125);
|
||||
brw_DPAS(p, translate_systolic_depth(inst->sdepth), inst->rcount,
|
||||
dst, src[0], src[1], src[2]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_MAD:
|
||||
assert(devinfo->ver >= 6);
|
||||
if (devinfo->ver < 10)
|
||||
|
@@ -253,8 +253,10 @@ namespace {
|
||||
has_invalid_src_region(const intel_device_info *devinfo, const fs_inst *inst,
|
||||
unsigned i)
|
||||
{
|
||||
if (is_send(inst) || inst->is_math() || inst->is_control_source(i))
|
||||
if (is_send(inst) || inst->is_math() || inst->is_control_source(i) ||
|
||||
inst->opcode == BRW_OPCODE_DPAS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Empirical testing shows that Broadwell has a bug affecting half-float
|
||||
* MAD instructions when any of its sources has a non-zero offset, such
|
||||
|
@@ -524,6 +524,67 @@ brw_inst_set_3src_a1_src2_imm(ASSERTED const struct intel_device_info *devinfo,
|
||||
}
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* Three-source systolic instructions:
|
||||
* @{
|
||||
*/
|
||||
F(dpas_3src_src2_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 127, 120)
|
||||
F(dpas_3src_src2_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 119, 115)
|
||||
F(dpas_3src_src2_reg_file, /* 4+ */ -1, -1, /* 12+ */ 114, 114)
|
||||
F(dpas_3src_src1_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 111, 104)
|
||||
F(dpas_3src_src1_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 103, 99)
|
||||
F(dpas_3src_src1_reg_file, /* 4+ */ -1, -1, /* 12+ */ 98, 98)
|
||||
F(dpas_3src_src1_hw_type, /* 4+ */ -1, -1, /* 12+ */ 90, 88)
|
||||
F(dpas_3src_src1_subbyte, /* 4+ */ -1, -1, /* 12+ */ 87, 86)
|
||||
F(dpas_3src_src2_subbyte, /* 4+ */ -1, -1, /* 12+ */ 85, 84)
|
||||
F(dpas_3src_src2_hw_type, /* 4+ */ -1, -1, /* 12+ */ 82, 80)
|
||||
F(dpas_3src_src0_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 79, 72)
|
||||
F(dpas_3src_src0_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 71, 67)
|
||||
F(dpas_3src_src0_reg_file, /* 4+ */ -1, -1, /* 12+ */ 66, 66)
|
||||
F(dpas_3src_dst_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 63, 56)
|
||||
F(dpas_3src_dst_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 55, 51)
|
||||
F(dpas_3src_dst_reg_file, /* 4+ */ -1, -1, /* 12+ */ 50, 50)
|
||||
F(dpas_3src_sdepth, /* 4+ */ -1, -1, /* 12+ */ 49, 48)
|
||||
F(dpas_3src_rcount, /* 4+ */ -1, -1, /* 12+ */ 45, 43)
|
||||
F(dpas_3src_src0_hw_type, /* 4+ */ -1, -1, /* 12+ */ 42, 40)
|
||||
F(dpas_3src_exec_type, /* 4+ */ -1, -1, /* 12+ */ 39, 39)
|
||||
F(dpas_3src_dst_hw_type, /* 4+ */ -1, -1, /* 12+ */ 38, 36)
|
||||
/** @} */
|
||||
|
||||
#define REG_TYPE(reg) \
|
||||
static inline void \
|
||||
brw_inst_set_dpas_3src_##reg##_type(const struct intel_device_info *devinfo, \
|
||||
brw_inst *inst, enum brw_reg_type type) \
|
||||
{ \
|
||||
UNUSED enum gfx10_align1_3src_exec_type exec_type = \
|
||||
(enum gfx10_align1_3src_exec_type) brw_inst_dpas_3src_exec_type(devinfo,\
|
||||
inst); \
|
||||
if (brw_reg_type_is_floating_point(type)) { \
|
||||
assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT); \
|
||||
} else { \
|
||||
assert(exec_type == BRW_ALIGN1_3SRC_EXEC_TYPE_INT); \
|
||||
} \
|
||||
unsigned hw_type = brw_reg_type_to_a1_hw_3src_type(devinfo, type); \
|
||||
brw_inst_set_dpas_3src_##reg##_hw_type(devinfo, inst, hw_type); \
|
||||
} \
|
||||
\
|
||||
static inline enum brw_reg_type \
|
||||
brw_inst_dpas_3src_##reg##_type(const struct intel_device_info *devinfo, \
|
||||
const brw_inst *inst) \
|
||||
{ \
|
||||
enum gfx10_align1_3src_exec_type exec_type = \
|
||||
(enum gfx10_align1_3src_exec_type) brw_inst_dpas_3src_exec_type(devinfo,\
|
||||
inst); \
|
||||
unsigned hw_type = brw_inst_dpas_3src_##reg##_hw_type(devinfo, inst); \
|
||||
return brw_a1_hw_3src_type_to_reg_type(devinfo, hw_type, exec_type); \
|
||||
}
|
||||
|
||||
REG_TYPE(dst)
|
||||
REG_TYPE(src0)
|
||||
REG_TYPE(src1)
|
||||
REG_TYPE(src2)
|
||||
#undef REG_TYPE
|
||||
|
||||
/**
|
||||
* Flow control instruction bits:
|
||||
* @{
|
||||
|
@@ -199,6 +199,16 @@ struct backend_instruction {
|
||||
*/
|
||||
unsigned flag_subreg:2;
|
||||
|
||||
/**
|
||||
* Systolic depth used by DPAS instruction.
|
||||
*/
|
||||
unsigned sdepth:4;
|
||||
|
||||
/**
|
||||
* Repeat count used by DPAS instruction.
|
||||
*/
|
||||
unsigned rcount:4;
|
||||
|
||||
/** The number of hardware registers used for a message header. */
|
||||
uint8_t header_size;
|
||||
};
|
||||
|
@@ -148,6 +148,8 @@ namespace {
|
||||
!brw_reg_type_is_floating_point(tx) && type_sz(tx) == 4 &&
|
||||
type_sz(inst->src[0].type) == type_sz(inst->src[1].type))
|
||||
tx = brw_int_type(8, tx == BRW_REGISTER_TYPE_D);
|
||||
|
||||
rcount = inst->opcode == BRW_OPCODE_DPAS ? inst->rcount : 0;
|
||||
}
|
||||
|
||||
instruction_info(const struct brw_isa_info *isa,
|
||||
@@ -155,7 +157,7 @@ namespace {
|
||||
isa(isa), devinfo(isa->devinfo), op(inst->opcode),
|
||||
td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)),
|
||||
tx(get_exec_type(inst)), sx(0), ss(0), sc(0),
|
||||
desc(inst->desc), sfid(inst->sfid)
|
||||
desc(inst->desc), sfid(inst->sfid), rcount(0)
|
||||
{
|
||||
/* Compute the maximum source size. */
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++)
|
||||
@@ -195,6 +197,8 @@ namespace {
|
||||
uint32_t desc;
|
||||
/** Send message shared function ID. */
|
||||
uint8_t sfid;
|
||||
/** Repeat count for DPAS instructions. */
|
||||
uint8_t rcount;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -505,6 +509,32 @@ namespace {
|
||||
else
|
||||
abort();
|
||||
|
||||
case BRW_OPCODE_DPAS: {
|
||||
unsigned ld;
|
||||
|
||||
switch (info.rcount) {
|
||||
case 1:
|
||||
ld = 21;
|
||||
break;
|
||||
case 2:
|
||||
ld = 22;
|
||||
break;
|
||||
case 8:
|
||||
default:
|
||||
ld = 32;
|
||||
break;
|
||||
}
|
||||
|
||||
/* DPAS cannot write the accumulator or the flags, so pass UINT_MAX
|
||||
* for la and lf.
|
||||
*/
|
||||
if (devinfo->verx10 >= 125)
|
||||
return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2,
|
||||
0, ld, UINT_MAX, UINT_MAX, 0, 0);
|
||||
else
|
||||
abort();
|
||||
}
|
||||
|
||||
case SHADER_OPCODE_RCP:
|
||||
case SHADER_OPCODE_RSQ:
|
||||
case SHADER_OPCODE_SQRT:
|
||||
|
@@ -627,6 +627,21 @@ schedule_node::set_latency_gfx7(const struct brw_isa_info *isa)
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DPAS:
|
||||
switch (inst->rcount) {
|
||||
case 1:
|
||||
latency = 21;
|
||||
break;
|
||||
case 2:
|
||||
latency = 22;
|
||||
break;
|
||||
case 8:
|
||||
default:
|
||||
latency = 32;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
/* 2 cycles:
|
||||
* mul(8) g4<1>F g2<0,1,0>F 0.5F { align1 WE_normal 1Q };
|
||||
|
@@ -164,6 +164,13 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
||||
if (devinfo->ver > 7 && op == BRW_OPCODE_F16TO32)
|
||||
return "f16to32";
|
||||
|
||||
/* DPAS instructions may transiently exist on platforms that do not
|
||||
* support DPAS. They will eventually be lowered, but in the meantime it
|
||||
* must be possible to query the instruction name.
|
||||
*/
|
||||
if (devinfo->verx10 < 125 && op == BRW_OPCODE_DPAS)
|
||||
return "dpas";
|
||||
|
||||
assert(brw_opcode_desc(isa, op)->name);
|
||||
return brw_opcode_desc(isa, op)->name;
|
||||
case FS_OPCODE_FB_WRITE:
|
||||
@@ -936,6 +943,7 @@ backend_instruction::can_do_source_mods() const
|
||||
case BRW_OPCODE_ROR:
|
||||
case BRW_OPCODE_SUBB:
|
||||
case BRW_OPCODE_DP4A:
|
||||
case BRW_OPCODE_DPAS:
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
case SHADER_OPCODE_CLUSTER_BROADCAST:
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
|
Reference in New Issue
Block a user