intel/compiler: Support 16 bit float ops
Signed-off-by: Rohan Garg <rohan.garg@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17988>
This commit is contained in:
@@ -5920,6 +5920,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static fs_reg
|
||||||
|
expand_to_32bit(const fs_builder &bld, const fs_reg &src)
|
||||||
|
{
|
||||||
|
if (type_sz(src.type) == 2) {
|
||||||
|
fs_reg src32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
bld.MOV(src32, retype(src, BRW_REGISTER_TYPE_UW));
|
||||||
|
return src32;
|
||||||
|
} else {
|
||||||
|
return src;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
|
fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
|
||||||
int op, nir_intrinsic_instr *instr)
|
int op, nir_intrinsic_instr *instr)
|
||||||
@@ -5930,7 +5942,8 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
|
|||||||
* descriptors provided for Qword atomic ops except for A64 messages.
|
* descriptors provided for Qword atomic ops except for A64 messages.
|
||||||
*/
|
*/
|
||||||
assert(nir_dest_bit_size(instr->dest) == 32 ||
|
assert(nir_dest_bit_size(instr->dest) == 32 ||
|
||||||
(nir_dest_bit_size(instr->dest) == 64 && devinfo->has_lsc));
|
(nir_dest_bit_size(instr->dest) == 64 && devinfo->has_lsc) ||
|
||||||
|
(nir_dest_bit_size(instr->dest) == 16 && devinfo->has_lsc));
|
||||||
|
|
||||||
fs_reg dest;
|
fs_reg dest;
|
||||||
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
||||||
@@ -5945,11 +5958,14 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
|
|||||||
|
|
||||||
fs_reg data;
|
fs_reg data;
|
||||||
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
|
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
|
||||||
data = get_nir_src(instr->src[2]);
|
data = expand_to_32bit(bld, get_nir_src(instr->src[2]));
|
||||||
|
|
||||||
if (op == BRW_AOP_CMPWR) {
|
if (op == BRW_AOP_CMPWR) {
|
||||||
fs_reg tmp = bld.vgrf(data.type, 2);
|
fs_reg tmp = bld.vgrf(data.type, 2);
|
||||||
fs_reg sources[2] = { data, get_nir_src(instr->src[3]) };
|
fs_reg sources[2] = {
|
||||||
|
data,
|
||||||
|
expand_to_32bit(bld, get_nir_src(instr->src[3]))
|
||||||
|
};
|
||||||
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
||||||
data = tmp;
|
data = tmp;
|
||||||
}
|
}
|
||||||
@@ -5957,8 +5973,25 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
|
|||||||
|
|
||||||
/* Emit the actual atomic operation */
|
/* Emit the actual atomic operation */
|
||||||
|
|
||||||
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
|
switch (nir_dest_bit_size(instr->dest)) {
|
||||||
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
case 16: {
|
||||||
|
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
|
||||||
|
retype(dest32, dest.type),
|
||||||
|
srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
|
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW),
|
||||||
|
retype(dest32, BRW_REGISTER_TYPE_UD));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 32:
|
||||||
|
case 64:
|
||||||
|
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
|
||||||
|
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("Unsupported bit size");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -5976,19 +6009,38 @@ fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld,
|
|||||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
|
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
|
||||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||||
|
|
||||||
fs_reg data = get_nir_src(instr->src[2]);
|
fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[2]));
|
||||||
if (op == BRW_AOP_FCMPWR) {
|
if (op == BRW_AOP_FCMPWR) {
|
||||||
fs_reg tmp = bld.vgrf(data.type, 2);
|
fs_reg tmp = bld.vgrf(data.type, 2);
|
||||||
fs_reg sources[2] = { data, get_nir_src(instr->src[3]) };
|
fs_reg sources[2] = {
|
||||||
|
data,
|
||||||
|
expand_to_32bit(bld, get_nir_src(instr->src[3]))
|
||||||
|
};
|
||||||
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
||||||
data = tmp;
|
data = tmp;
|
||||||
}
|
}
|
||||||
srcs[SURFACE_LOGICAL_SRC_DATA] = data;
|
srcs[SURFACE_LOGICAL_SRC_DATA] = data;
|
||||||
|
|
||||||
/* Emit the actual atomic operation */
|
/* Emit the actual atomic operation */
|
||||||
|
switch (nir_dest_bit_size(instr->dest)) {
|
||||||
|
case 16: {
|
||||||
|
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
||||||
|
retype(dest32, dest.type),
|
||||||
|
srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
|
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW),
|
||||||
|
retype(dest32, BRW_REGISTER_TYPE_UD));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
case 32:
|
||||||
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
case 64:
|
||||||
|
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
||||||
|
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("Unsupported bit size");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -6007,10 +6059,13 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
|
|||||||
|
|
||||||
fs_reg data;
|
fs_reg data;
|
||||||
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
|
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
|
||||||
data = get_nir_src(instr->src[1]);
|
data = expand_to_32bit(bld, get_nir_src(instr->src[1]));
|
||||||
if (op == BRW_AOP_CMPWR) {
|
if (op == BRW_AOP_CMPWR) {
|
||||||
fs_reg tmp = bld.vgrf(data.type, 2);
|
fs_reg tmp = bld.vgrf(data.type, 2);
|
||||||
fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
|
fs_reg sources[2] = {
|
||||||
|
expand_to_32bit(bld, data),
|
||||||
|
expand_to_32bit(bld, get_nir_src(instr->src[2]))
|
||||||
|
};
|
||||||
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
||||||
data = tmp;
|
data = tmp;
|
||||||
}
|
}
|
||||||
@@ -6030,8 +6085,25 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
|
|||||||
|
|
||||||
/* Emit the actual atomic operation operation */
|
/* Emit the actual atomic operation operation */
|
||||||
|
|
||||||
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
|
switch (nir_dest_bit_size(instr->dest)) {
|
||||||
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
case 16: {
|
||||||
|
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
|
||||||
|
retype(dest32, dest.type),
|
||||||
|
srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
|
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW),
|
||||||
|
retype(dest32, BRW_REGISTER_TYPE_UD));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 32:
|
||||||
|
case 64:
|
||||||
|
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
|
||||||
|
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
unreachable("Unsupported bit size");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -6048,10 +6120,13 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
|
|||||||
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
|
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
|
||||||
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
|
||||||
|
|
||||||
fs_reg data = get_nir_src(instr->src[1]);
|
fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[1]));
|
||||||
if (op == BRW_AOP_FCMPWR) {
|
if (op == BRW_AOP_FCMPWR) {
|
||||||
fs_reg tmp = bld.vgrf(data.type, 2);
|
fs_reg tmp = bld.vgrf(data.type, 2);
|
||||||
fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
|
fs_reg sources[2] = {
|
||||||
|
data,
|
||||||
|
expand_to_32bit(bld, get_nir_src(instr->src[2]))
|
||||||
|
};
|
||||||
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
|
||||||
data = tmp;
|
data = tmp;
|
||||||
}
|
}
|
||||||
@@ -6071,20 +6146,26 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
|
|||||||
|
|
||||||
/* Emit the actual atomic operation operation */
|
/* Emit the actual atomic operation operation */
|
||||||
|
|
||||||
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
switch (nir_dest_bit_size(instr->dest)) {
|
||||||
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
case 16: {
|
||||||
}
|
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
||||||
|
retype(dest32, dest.type),
|
||||||
|
srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
|
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW),
|
||||||
|
retype(dest32, BRW_REGISTER_TYPE_UD));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
static fs_reg
|
case 32:
|
||||||
expand_to_32bit(const fs_builder &bld, const fs_reg &src)
|
case 64:
|
||||||
{
|
bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
||||||
if (type_sz(src.type) == 2) {
|
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
fs_reg src32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
break;
|
||||||
bld.MOV(src32, retype(src, BRW_REGISTER_TYPE_UW));
|
default:
|
||||||
return src32;
|
unreachable("Unsupported bit size");
|
||||||
} else {
|
|
||||||
return src;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -6120,7 +6201,8 @@ fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
|
|||||||
switch (nir_dest_bit_size(instr->dest)) {
|
switch (nir_dest_bit_size(instr->dest)) {
|
||||||
case 16: {
|
case 16: {
|
||||||
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL, dest32,
|
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL,
|
||||||
|
retype(dest32, dest.type),
|
||||||
srcs, A64_LOGICAL_NUM_SRCS);
|
srcs, A64_LOGICAL_NUM_SRCS);
|
||||||
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32);
|
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32);
|
||||||
break;
|
break;
|
||||||
@@ -6169,7 +6251,8 @@ fs_visitor::nir_emit_global_atomic_float(const fs_builder &bld,
|
|||||||
switch (nir_dest_bit_size(instr->dest)) {
|
switch (nir_dest_bit_size(instr->dest)) {
|
||||||
case 16: {
|
case 16: {
|
||||||
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL, dest32,
|
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL,
|
||||||
|
retype(dest32, dest.type),
|
||||||
srcs, A64_LOGICAL_NUM_SRCS);
|
srcs, A64_LOGICAL_NUM_SRCS);
|
||||||
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32);
|
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32);
|
||||||
break;
|
break;
|
||||||
|
@@ -1695,6 +1695,7 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS);
|
const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS);
|
||||||
const unsigned src_comps = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
const unsigned src_comps = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
||||||
const unsigned src_sz = type_sz(src.type);
|
const unsigned src_sz = type_sz(src.type);
|
||||||
|
const unsigned dst_sz = type_sz(inst->dst.type);
|
||||||
|
|
||||||
const bool has_side_effects = inst->has_side_effects();
|
const bool has_side_effects = inst->has_side_effects();
|
||||||
|
|
||||||
@@ -1758,10 +1759,11 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
inst->opcode == SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL ?
|
inst->opcode == SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL ?
|
||||||
brw_atomic_op_to_lsc_fatomic_op(arg.ud) :
|
brw_atomic_op_to_lsc_fatomic_op(arg.ud) :
|
||||||
brw_atomic_op_to_lsc_atomic_op(arg.ud);
|
brw_atomic_op_to_lsc_atomic_op(arg.ud);
|
||||||
|
|
||||||
inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size,
|
inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size,
|
||||||
surf_type, LSC_ADDR_SIZE_A32,
|
surf_type, LSC_ADDR_SIZE_A32,
|
||||||
1 /* num_coordinates */,
|
1 /* num_coordinates */,
|
||||||
lsc_bits_to_data_size(src_sz * 8),
|
lsc_bits_to_data_size(dst_sz * 8),
|
||||||
1 /* num_channels */,
|
1 /* num_channels */,
|
||||||
false /* transpose */,
|
false /* transpose */,
|
||||||
LSC_CACHE_STORE_L1UC_L3WB,
|
LSC_CACHE_STORE_L1UC_L3WB,
|
||||||
@@ -2032,6 +2034,7 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
const fs_reg &addr = inst->src[A64_LOGICAL_ADDRESS];
|
const fs_reg &addr = inst->src[A64_LOGICAL_ADDRESS];
|
||||||
const fs_reg &src = inst->src[A64_LOGICAL_SRC];
|
const fs_reg &src = inst->src[A64_LOGICAL_SRC];
|
||||||
const unsigned src_sz = type_sz(src.type);
|
const unsigned src_sz = type_sz(src.type);
|
||||||
|
const unsigned dst_sz = type_sz(inst->dst.type);
|
||||||
|
|
||||||
const unsigned src_comps = inst->components_read(1);
|
const unsigned src_comps = inst->components_read(1);
|
||||||
assert(inst->src[A64_LOGICAL_ARG].file == IMM);
|
assert(inst->src[A64_LOGICAL_ARG].file == IMM);
|
||||||
@@ -2102,7 +2105,7 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||||||
inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size,
|
inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size,
|
||||||
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,
|
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,
|
||||||
1 /* num_coordinates */,
|
1 /* num_coordinates */,
|
||||||
lsc_bits_to_data_size(src_sz * 8),
|
lsc_bits_to_data_size(dst_sz * 8),
|
||||||
1 /* num_channels */,
|
1 /* num_channels */,
|
||||||
false /* transpose */,
|
false /* transpose */,
|
||||||
LSC_CACHE_STORE_L1UC_L3WB,
|
LSC_CACHE_STORE_L1UC_L3WB,
|
||||||
|
Reference in New Issue
Block a user