diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index cbef3d15334..6ecabce741e 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -361,12 +361,8 @@ public: nir_intrinsic_instr *instr); void nir_emit_ssbo_atomic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); - void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld, - nir_intrinsic_instr *instr); void nir_emit_shared_atomic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); - void nir_emit_shared_atomic_float(const brw::fs_builder &bld, - nir_intrinsic_instr *instr); void nir_emit_global_atomic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); void nir_emit_global_atomic_float(const brw::fs_builder &bld, diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 990c5ea4ce7..3523b003a7a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3779,12 +3779,10 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, case nir_intrinsic_shared_atomic_xor: case nir_intrinsic_shared_atomic_exchange: case nir_intrinsic_shared_atomic_comp_swap: - nir_emit_shared_atomic(bld, instr); - break; case nir_intrinsic_shared_atomic_fmin: case nir_intrinsic_shared_atomic_fmax: case nir_intrinsic_shared_atomic_fcomp_swap: - nir_emit_shared_atomic_float(bld, instr); + nir_emit_shared_atomic(bld, instr); break; case nir_intrinsic_load_shared: { @@ -5053,13 +5051,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_ssbo_atomic_xor: case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_ssbo_atomic_comp_swap: - nir_emit_ssbo_atomic(bld, instr); - break; case nir_intrinsic_ssbo_atomic_fadd: case nir_intrinsic_ssbo_atomic_fmin: case nir_intrinsic_ssbo_atomic_fmax: case nir_intrinsic_ssbo_atomic_fcomp_swap: - nir_emit_ssbo_atomic_float(bld, instr); + nir_emit_ssbo_atomic(bld, instr); break; case nir_intrinsic_get_ssbo_size: { @@ -5958,16 +5954,20 @@ void fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, nir_intrinsic_instr *instr) { - int op = lsc_aop_for_nir_intrinsic(instr); + enum lsc_opcode op = lsc_aop_for_nir_intrinsic(instr); + int num_data = lsc_op_num_data_values(op); /* The BTI untyped atomic messages only support 32-bit atomics. If you * just look at the big table of messages in the Vol 7 of the SKL PRM, they * appear to exist. However, if you look at Vol 2a, there are no message * descriptors provided for Qword atomic ops except for A64 messages. + * + * 16-bit float atomics are supported, however. */ assert(nir_dest_bit_size(instr->dest) == 32 || (nir_dest_bit_size(instr->dest) == 64 && devinfo->has_lsc) || - (nir_dest_bit_size(instr->dest) == 16 && devinfo->has_lsc)); + (nir_dest_bit_size(instr->dest) == 16 && + (devinfo->has_lsc || lsc_opcode_is_atomic_float(op)))); fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) @@ -5981,10 +5981,10 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); fs_reg data; - if (op != LSC_OP_ATOMIC_INC && op != LSC_OP_ATOMIC_DEC) + if (num_data >= 1) data = expand_to_32bit(bld, get_nir_src(instr->src[2])); - if (op == LSC_OP_ATOMIC_CMPXCHG) { + if (num_data >= 2) { fs_reg tmp = bld.vgrf(data.type, 2); fs_reg sources[2] = { data, @@ -6018,62 +6018,12 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, } } -void -fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld, - nir_intrinsic_instr *instr) -{ - int op = lsc_aop_for_nir_intrinsic(instr); - - fs_reg dest; - if (nir_intrinsic_infos[instr->intrinsic].has_dest) - dest = get_nir_dest(instr->dest); - - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; - srcs[SURFACE_LOGICAL_SRC_SURFACE] = get_nir_ssbo_intrinsic_index(bld, instr); - srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]); - srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); - srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op); - srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); - - fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[2])); - if (op == LSC_OP_ATOMIC_FCMPXCHG) { - fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { - data, - expand_to_32bit(bld, get_nir_src(instr->src[3])) - }; - bld.LOAD_PAYLOAD(tmp, sources, 2, 0); - data = tmp; - } - srcs[SURFACE_LOGICAL_SRC_DATA] = data; - - /* Emit the actual atomic operation */ - switch (nir_dest_bit_size(instr->dest)) { - case 16: { - fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, - retype(dest32, dest.type), - srcs, SURFACE_LOGICAL_NUM_SRCS); - bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), - retype(dest32, BRW_REGISTER_TYPE_UD)); - break; - } - - case 32: - case 64: - bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, - dest, srcs, SURFACE_LOGICAL_NUM_SRCS); - break; - default: - unreachable("Unsupported bit size"); - } -} - void fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, nir_intrinsic_instr *instr) { - int op = lsc_aop_for_nir_intrinsic(instr); + enum lsc_opcode op = lsc_aop_for_nir_intrinsic(instr); + int num_data = lsc_op_num_data_values(op); fs_reg dest; if (nir_intrinsic_infos[instr->intrinsic].has_dest) @@ -6086,9 +6036,10 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); fs_reg data; - if (op != LSC_OP_ATOMIC_INC && op != LSC_OP_ATOMIC_DEC) + if (num_data >= 1) data = expand_to_32bit(bld, get_nir_src(instr->src[1])); - if (op == LSC_OP_ATOMIC_CMPXCHG) { + + if (num_data >= 2) { fs_reg tmp = bld.vgrf(data.type, 2); fs_reg sources[2] = { data, @@ -6134,70 +6085,6 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, } } -void -fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld, - nir_intrinsic_instr *instr) -{ - int op = lsc_aop_for_nir_intrinsic(instr); - - fs_reg dest; - if (nir_intrinsic_infos[instr->intrinsic].has_dest) - dest = get_nir_dest(instr->dest); - - fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; - srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GFX7_BTI_SLM); - srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); - srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op); - srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); - - fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[1])); - if (op == LSC_OP_ATOMIC_FCMPXCHG) { - fs_reg tmp = bld.vgrf(data.type, 2); - fs_reg sources[2] = { - data, - expand_to_32bit(bld, get_nir_src(instr->src[2])) - }; - bld.LOAD_PAYLOAD(tmp, sources, 2, 0); - data = tmp; - } - srcs[SURFACE_LOGICAL_SRC_DATA] = data; - - /* Get the offset */ - if (nir_src_is_const(instr->src[0])) { - srcs[SURFACE_LOGICAL_SRC_ADDRESS] = - brw_imm_ud(nir_intrinsic_base(instr) + - nir_src_as_uint(instr->src[0])); - } else { - srcs[SURFACE_LOGICAL_SRC_ADDRESS] = vgrf(glsl_type::uint_type); - bld.ADD(srcs[SURFACE_LOGICAL_SRC_ADDRESS], - retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD), - brw_imm_ud(nir_intrinsic_base(instr))); - } - - /* Emit the actual atomic operation operation */ - - switch (nir_dest_bit_size(instr->dest)) { - case 16: { - fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, - retype(dest32, dest.type), - srcs, SURFACE_LOGICAL_NUM_SRCS); - bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), - retype(dest32, BRW_REGISTER_TYPE_UD)); - break; - } - - case 32: - case 64: - bld.emit(SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL, - dest, srcs, SURFACE_LOGICAL_NUM_SRCS); - break; - default: - unreachable("Unsupported bit size"); - } - -} - void fs_visitor::nir_emit_global_atomic(const fs_builder &bld, nir_intrinsic_instr *instr)