intel/fs: Add support for 16-bit A64 float and integer atomics
The messages for those 16-bit operations still use 32-bit sources and destinations, so expand them accordingly when building the payload. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8750>
This commit is contained in:

committed by
Marge Bot

parent
a572471edc
commit
91192696e6
@@ -452,6 +452,10 @@ static const char *const dp_dc1_msg_type_hsw[32] = {
|
|||||||
"DC untyped atomic float op",
|
"DC untyped atomic float op",
|
||||||
[GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP] =
|
[GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP] =
|
||||||
"DC A64 untyped atomic float op",
|
"DC A64 untyped atomic float op",
|
||||||
|
[GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP] =
|
||||||
|
"DC A64 untyped atomic half-integer op",
|
||||||
|
[GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP] =
|
||||||
|
"DC A64 untyped atomic half-float op",
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char *const aop[16] = {
|
static const char *const aop[16] = {
|
||||||
@@ -2067,6 +2071,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||||||
case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
|
case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
|
||||||
case HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2:
|
case HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2:
|
||||||
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
|
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
|
||||||
|
case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP:
|
||||||
control(file, "atomic op", aop, msg_ctrl & 0xf, &space);
|
control(file, "atomic op", aop, msg_ctrl & 0xf, &space);
|
||||||
break;
|
break;
|
||||||
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
|
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
|
||||||
@@ -2082,6 +2087,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo,
|
|||||||
}
|
}
|
||||||
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
|
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
|
||||||
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
|
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
|
||||||
|
case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP:
|
||||||
format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16);
|
format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16);
|
||||||
control(file, "atomic float op", aop_float, msg_ctrl & 0xf,
|
control(file, "atomic float op", aop_float, msg_ctrl & 0xf,
|
||||||
&space);
|
&space);
|
||||||
|
@@ -894,9 +894,12 @@ brw_dp_a64_untyped_atomic_desc(const struct gen_device_info *devinfo,
|
|||||||
{
|
{
|
||||||
assert(exec_size == 8);
|
assert(exec_size == 8);
|
||||||
assert(devinfo->gen >= 8);
|
assert(devinfo->gen >= 8);
|
||||||
assert(bit_size == 32 || bit_size == 64);
|
assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
|
||||||
|
assert(devinfo->gen >= 12 || bit_size >= 32);
|
||||||
|
|
||||||
const unsigned msg_type = GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
|
const unsigned msg_type = bit_size == 16 ?
|
||||||
|
GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
|
||||||
|
GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
|
||||||
|
|
||||||
const unsigned msg_control =
|
const unsigned msg_control =
|
||||||
SET_BITS(atomic_op, 3, 0) |
|
SET_BITS(atomic_op, 3, 0) |
|
||||||
@@ -910,14 +913,19 @@ brw_dp_a64_untyped_atomic_desc(const struct gen_device_info *devinfo,
|
|||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info *devinfo,
|
brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info *devinfo,
|
||||||
ASSERTED unsigned exec_size,
|
ASSERTED unsigned exec_size,
|
||||||
|
unsigned bit_size,
|
||||||
unsigned atomic_op,
|
unsigned atomic_op,
|
||||||
bool response_expected)
|
bool response_expected)
|
||||||
{
|
{
|
||||||
assert(exec_size == 8);
|
assert(exec_size == 8);
|
||||||
assert(devinfo->gen >= 9);
|
assert(devinfo->gen >= 9);
|
||||||
|
assert(bit_size == 16 || bit_size == 32);
|
||||||
|
assert(devinfo->gen >= 12 || bit_size == 32);
|
||||||
|
|
||||||
assert(exec_size > 0);
|
assert(exec_size > 0);
|
||||||
const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP;
|
const unsigned msg_type = bit_size == 32 ?
|
||||||
|
GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
|
||||||
|
GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
|
||||||
|
|
||||||
const unsigned msg_control =
|
const unsigned msg_control =
|
||||||
SET_BITS(atomic_op, 1, 0) |
|
SET_BITS(atomic_op, 1, 0) |
|
||||||
|
@@ -435,8 +435,10 @@ enum opcode {
|
|||||||
SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
|
SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
|
||||||
SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL,
|
SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL,
|
||||||
SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
||||||
|
SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL,
|
||||||
SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
|
SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
|
||||||
SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL,
|
||||||
|
SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL,
|
||||||
|
|
||||||
SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
|
SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
|
||||||
SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
|
SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
|
||||||
@@ -1439,12 +1441,14 @@ enum brw_message_target {
|
|||||||
#define GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ 0x10
|
#define GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ 0x10
|
||||||
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ 0x11
|
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ 0x11
|
||||||
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP 0x12
|
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP 0x12
|
||||||
|
#define GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP 0x13
|
||||||
#define GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ 0x14
|
#define GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ 0x14
|
||||||
#define GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE 0x15
|
#define GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE 0x15
|
||||||
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE 0x19
|
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE 0x19
|
||||||
#define GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE 0x1a
|
#define GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE 0x1a
|
||||||
#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b
|
#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b
|
||||||
#define GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP 0x1d
|
#define GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP 0x1d
|
||||||
|
#define GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP 0x1e
|
||||||
|
|
||||||
/* GEN9 */
|
/* GEN9 */
|
||||||
#define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12
|
#define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12
|
||||||
|
@@ -859,6 +859,7 @@ fs_inst::components_read(unsigned i) const
|
|||||||
return i == 1 ? src[2].ud : 1;
|
return i == 1 ? src[2].ud : 1;
|
||||||
|
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
assert(src[2].file == IMM);
|
assert(src[2].file == IMM);
|
||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
@@ -878,7 +879,8 @@ fs_inst::components_read(unsigned i) const
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
|
||||||
assert(src[2].file == IMM);
|
assert(src[2].file == IMM);
|
||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
/* Data source */
|
/* Data source */
|
||||||
@@ -5969,15 +5971,28 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
!inst->dst.is_null());
|
!inst->dst.is_null());
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
|
||||||
|
desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 16,
|
||||||
|
arg, /* atomic_op */
|
||||||
|
!inst->dst.is_null());
|
||||||
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 64,
|
desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 64,
|
||||||
arg, /* atomic_op */
|
arg, /* atomic_op */
|
||||||
!inst->dst.is_null());
|
!inst->dst.is_null());
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
|
||||||
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
|
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
|
||||||
|
16, /* bit_size */
|
||||||
|
arg, /* atomic_op */
|
||||||
|
!inst->dst.is_null());
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
|
||||||
|
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
|
||||||
|
32, /* bit_size */
|
||||||
arg, /* atomic_op */
|
arg, /* atomic_op */
|
||||||
!inst->dst.is_null());
|
!inst->dst.is_null());
|
||||||
break;
|
break;
|
||||||
@@ -6354,8 +6369,10 @@ fs_visitor::lower_logical_sends()
|
|||||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
|
||||||
lower_a64_logical_send(ibld, inst);
|
lower_a64_logical_send(ibld, inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -6968,8 +6985,10 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
|||||||
return inst->exec_size;
|
return inst->exec_size;
|
||||||
|
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
|
||||||
return 8;
|
return 8;
|
||||||
|
|
||||||
case SHADER_OPCODE_URB_READ_SIMD8:
|
case SHADER_OPCODE_URB_READ_SIMD8:
|
||||||
|
@@ -5686,6 +5686,18 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
|
|||||||
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static fs_reg
|
||||||
|
expand_to_32bit(const fs_builder &bld, const fs_reg &src)
|
||||||
|
{
|
||||||
|
if (type_sz(src.type) == 2) {
|
||||||
|
fs_reg src32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
bld.MOV(src32, retype(src, BRW_REGISTER_TYPE_UW));
|
||||||
|
return src32;
|
||||||
|
} else {
|
||||||
|
return src;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
|
fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
|
||||||
int op, nir_intrinsic_instr *instr)
|
int op, nir_intrinsic_instr *instr)
|
||||||
@@ -5698,22 +5710,36 @@ fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
|
|||||||
|
|
||||||
fs_reg data;
|
fs_reg data;
|
||||||
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
|
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
|
||||||
data = get_nir_src(instr->src[1]);
|
data = expand_to_32bit(bld, get_nir_src(instr->src[1]));
|
||||||
|
|
||||||
if (op == BRW_AOP_CMPWR) {
|
if (op == BRW_AOP_CMPWR) {
|
||||||
fs_reg tmp = bld.vgrf(data.type, 2);
|
fs_reg tmp = bld.vgrf(data.type, 2);
|
||||||
fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
|
fs_reg sources[2] = {
|
||||||
|
data,
|
||||||
|
expand_to_32bit(bld, get_nir_src(instr->src[2]))
|
||||||
|
};
|
||||||
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
||||||
data = tmp;
|
data = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nir_dest_bit_size(instr->dest) == 64) {
|
switch (nir_dest_bit_size(instr->dest)) {
|
||||||
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
|
case 16: {
|
||||||
dest, addr, data, brw_imm_ud(op));
|
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
} else {
|
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL,
|
||||||
assert(nir_dest_bit_size(instr->dest) == 32);
|
dest32, addr, data, brw_imm_ud(op));
|
||||||
|
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 32:
|
||||||
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
||||||
dest, addr, data, brw_imm_ud(op));
|
dest, addr, data, brw_imm_ud(op));
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
|
||||||
|
dest, addr, data, brw_imm_ud(op));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("Unsupported bit size");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5727,17 +5753,33 @@ fs_visitor::nir_emit_global_atomic_float(const fs_builder &bld,
|
|||||||
fs_reg addr = get_nir_src(instr->src[0]);
|
fs_reg addr = get_nir_src(instr->src[0]);
|
||||||
|
|
||||||
assert(op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC);
|
assert(op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC);
|
||||||
fs_reg data = get_nir_src(instr->src[1]);
|
fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[1]));
|
||||||
|
|
||||||
if (op == BRW_AOP_FCMPWR) {
|
if (op == BRW_AOP_FCMPWR) {
|
||||||
fs_reg tmp = bld.vgrf(data.type, 2);
|
fs_reg tmp = bld.vgrf(data.type, 2);
|
||||||
fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
|
fs_reg sources[2] = {
|
||||||
|
data,
|
||||||
|
expand_to_32bit(bld, get_nir_src(instr->src[2]))
|
||||||
|
};
|
||||||
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
||||||
data = tmp;
|
data = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
switch (nir_dest_bit_size(instr->dest)) {
|
||||||
|
case 16: {
|
||||||
|
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL,
|
||||||
|
dest32, addr, data, brw_imm_ud(op));
|
||||||
|
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 32:
|
||||||
|
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL,
|
||||||
dest, addr, data, brw_imm_ud(op));
|
dest, addr, data, brw_imm_ud(op));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("Unsupported bit size");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@@ -515,6 +515,8 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
|||||||
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
|
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
|
||||||
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
|
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
|
||||||
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
|
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
|
||||||
|
case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP:
|
||||||
|
case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP:
|
||||||
/* See also GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */
|
/* See also GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */
|
||||||
latency = 14000;
|
latency = 14000;
|
||||||
break;
|
break;
|
||||||
|
@@ -323,10 +323,14 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
|||||||
return "a64_byte_scattered_write_logical";
|
return "a64_byte_scattered_write_logical";
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||||
return "a64_untyped_atomic_logical";
|
return "a64_untyped_atomic_logical";
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
|
||||||
|
return "a64_untyped_atomic_int16_logical";
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
return "a64_untyped_atomic_int64_logical";
|
return "a64_untyped_atomic_int64_logical";
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
|
||||||
return "a64_untyped_atomic_float_logical";
|
return "a64_untyped_atomic_float16_logical";
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
|
||||||
|
return "a64_untyped_atomic_float32_logical";
|
||||||
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
||||||
return "typed_atomic_logical";
|
return "typed_atomic_logical";
|
||||||
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
|
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
|
||||||
@@ -1101,8 +1105,10 @@ backend_instruction::has_side_effects() const
|
|||||||
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
|
||||||
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
|
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
|
case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
||||||
|
Reference in New Issue
Block a user