intel/compiler: Support 16 bit float ops

Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17988>
This commit is contained in:
Rohan Garg
2022-10-17 15:53:50 +02:00
parent 2e774180c6
commit 43169dbbe5
2 changed files with 117 additions and 31 deletions

View File

@@ -5920,6 +5920,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
}
}
static fs_reg
expand_to_32bit(const fs_builder &bld, const fs_reg &src)
{
if (type_sz(src.type) == 2) {
fs_reg src32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.MOV(src32, retype(src, BRW_REGISTER_TYPE_UW));
return src32;
} else {
return src;
}
}
void
fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
int op, nir_intrinsic_instr *instr)
@@ -5930,7 +5942,8 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
* descriptors provided for Qword atomic ops except for A64 messages.
*/
assert(nir_dest_bit_size(instr->dest) == 32 ||
(nir_dest_bit_size(instr->dest) == 64 && devinfo->has_lsc));
(nir_dest_bit_size(instr->dest) == 64 && devinfo->has_lsc) ||
(nir_dest_bit_size(instr->dest) == 16 && devinfo->has_lsc));
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
@@ -5945,11 +5958,14 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
fs_reg data;
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
data = get_nir_src(instr->src[2]);
data = expand_to_32bit(bld, get_nir_src(instr->src[2]));
if (op == BRW_AOP_CMPWR) {
fs_reg tmp = bld.vgrf(data.type, 2);
fs_reg sources[2] = { data, get_nir_src(instr->src[3]) };
fs_reg sources[2] = {
data,
expand_to_32bit(bld, get_nir_src(instr->src[3]))
};
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
data = tmp;
}
@@ -5957,8 +5973,25 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
/* Emit the actual atomic operation */
switch (nir_dest_bit_size(instr->dest)) {
case 16: {
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
retype(dest32, dest.type),
srcs, SURFACE_LOGICAL_NUM_SRCS);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW),
retype(dest32, BRW_REGISTER_TYPE_UD));
break;
}
case 32:
case 64:
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
break;
default:
unreachable("Unsupported bit size");
}
}
void
@@ -5976,19 +6009,38 @@ fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data = get_nir_src(instr->src[2]);
fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[2]));
if (op == BRW_AOP_FCMPWR) {
fs_reg tmp = bld.vgrf(data.type, 2);
fs_reg sources[2] = { data, get_nir_src(instr->src[3]) };
fs_reg sources[2] = {
data,
expand_to_32bit(bld, get_nir_src(instr->src[3]))
};
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
data = tmp;
}
srcs[SURFACE_LOGICAL_SRC_DATA] = data;
/* Emit the actual atomic operation */
switch (nir_dest_bit_size(instr->dest)) {
case 16: {
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
retype(dest32, dest.type),
srcs, SURFACE_LOGICAL_NUM_SRCS);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW),
retype(dest32, BRW_REGISTER_TYPE_UD));
break;
}
case 32:
case 64:
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
break;
default:
unreachable("Unsupported bit size");
}
}
void
@@ -6007,10 +6059,13 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
fs_reg data;
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
data = get_nir_src(instr->src[1]);
data = expand_to_32bit(bld, get_nir_src(instr->src[1]));
if (op == BRW_AOP_CMPWR) {
fs_reg tmp = bld.vgrf(data.type, 2);
fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
fs_reg sources[2] = {
expand_to_32bit(bld, data),
expand_to_32bit(bld, get_nir_src(instr->src[2]))
};
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
data = tmp;
}
@@ -6030,8 +6085,25 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
/* Emit the actual atomic operation operation */
switch (nir_dest_bit_size(instr->dest)) {
case 16: {
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
retype(dest32, dest.type),
srcs, SURFACE_LOGICAL_NUM_SRCS);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW),
retype(dest32, BRW_REGISTER_TYPE_UD));
break;
}
case 32:
case 64:
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
break;
default:
unreachable("Unsupported bit size");
}
}
void
@@ -6048,10 +6120,13 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data = get_nir_src(instr->src[1]);
fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[1]));
if (op == BRW_AOP_FCMPWR) {
fs_reg tmp = bld.vgrf(data.type, 2);
fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
fs_reg sources[2] = {
data,
expand_to_32bit(bld, get_nir_src(instr->src[2]))
};
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
data = tmp;
}
@@ -6071,20 +6146,26 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
/* Emit the actual atomic operation operation */
switch (nir_dest_bit_size(instr->dest)) {
case 16: {
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
retype(dest32, dest.type),
srcs, SURFACE_LOGICAL_NUM_SRCS);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW),
retype(dest32, BRW_REGISTER_TYPE_UD));
break;
}
case 32:
case 64:
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
}
static fs_reg
expand_to_32bit(const fs_builder &bld, const fs_reg &src)
{
if (type_sz(src.type) == 2) {
fs_reg src32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.MOV(src32, retype(src, BRW_REGISTER_TYPE_UW));
return src32;
} else {
return src;
break;
default:
unreachable("Unsupported bit size");
}
}
void
@@ -6120,7 +6201,8 @@ fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
switch (nir_dest_bit_size(instr->dest)) {
case 16: {
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL, dest32,
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL,
retype(dest32, dest.type),
srcs, A64_LOGICAL_NUM_SRCS);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32);
break;
@@ -6169,7 +6251,8 @@ fs_visitor::nir_emit_global_atomic_float(const fs_builder &bld,
switch (nir_dest_bit_size(instr->dest)) {
case 16: {
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL, dest32,
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL,
retype(dest32, dest.type),
srcs, A64_LOGICAL_NUM_SRCS);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32);
break;

View File

@@ -1695,6 +1695,7 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS);
const unsigned src_comps = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
const unsigned src_sz = type_sz(src.type);
const unsigned dst_sz = type_sz(inst->dst.type);
const bool has_side_effects = inst->has_side_effects();
@@ -1758,10 +1759,11 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
inst->opcode == SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL ?
brw_atomic_op_to_lsc_fatomic_op(arg.ud) :
brw_atomic_op_to_lsc_atomic_op(arg.ud);
inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size,
surf_type, LSC_ADDR_SIZE_A32,
1 /* num_coordinates */,
lsc_bits_to_data_size(src_sz * 8),
lsc_bits_to_data_size(dst_sz * 8),
1 /* num_channels */,
false /* transpose */,
LSC_CACHE_STORE_L1UC_L3WB,
@@ -2032,6 +2034,7 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
const fs_reg &addr = inst->src[A64_LOGICAL_ADDRESS];
const fs_reg &src = inst->src[A64_LOGICAL_SRC];
const unsigned src_sz = type_sz(src.type);
const unsigned dst_sz = type_sz(inst->dst.type);
const unsigned src_comps = inst->components_read(1);
assert(inst->src[A64_LOGICAL_ARG].file == IMM);
@@ -2102,7 +2105,7 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size,
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,
1 /* num_coordinates */,
lsc_bits_to_data_size(src_sz * 8),
lsc_bits_to_data_size(dst_sz * 8),
1 /* num_channels */,
false /* transpose */,
LSC_CACHE_STORE_L1UC_L3WB,