diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 82b06a55e23..abd8f5a9f74 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1154,6 +1154,21 @@ brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo, return GET_BITS(desc, 18, 18); } +static inline uint32_t +lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo, + enum lsc_fence_scope scope, + enum lsc_flush_type flush_type, + bool route_to_lsc) +{ + assert(devinfo->has_lsc); + return SET_BITS(LSC_OP_FENCE, 5, 0) | + SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) | + SET_BITS(scope, 11, 9) | + SET_BITS(flush_type, 14, 12) | + SET_BITS(route_to_lsc, 18, 18) | + SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29); +} + static inline uint32_t brw_mdc_sm2(unsigned exec_size) { diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index ba73c0049ef..b81cbb9c949 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3235,6 +3235,30 @@ brw_set_memory_fence_message(struct brw_codegen *p, brw_inst_set_binding_table_index(devinfo, insn, bti); } +static void +gfx12_set_memory_fence_message(struct brw_codegen *p, + struct brw_inst *insn, + enum brw_message_target sfid) +{ + const unsigned mlen = 1; /* g0 header */ + /* Completion signaled by write to register. No data returned. */ + const unsigned rlen = 1; + + brw_inst_set_sfid(p->devinfo, insn, sfid); + + enum lsc_fence_scope scope = LSC_FENCE_THREADGROUP; + enum lsc_flush_type flush_type = LSC_FLUSH_TYPE_NONE; + + if (sfid == GFX12_SFID_TGM) { + scope = LSC_FENCE_GPU; + flush_type = LSC_FLUSH_TYPE_EVICT; + } + + brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope, + flush_type, false) | + brw_message_desc(p->devinfo, mlen, rlen, false)); +} + void brw_memory_fence(struct brw_codegen *p, struct brw_reg dst, @@ -3257,7 +3281,12 @@ brw_memory_fence(struct brw_codegen *p, brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, src); - brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti); + + /* All DG2 hardware requires LSC for fence messages, even A-step */ + if (devinfo->has_lsc) + gfx12_set_memory_fence_message(p, insn, sfid); + else + brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti); } void diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 64a714516ad..f0ec7dc839b 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4270,7 +4270,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_memory_barrier: case nir_intrinsic_begin_invocation_interlock: case nir_intrinsic_end_invocation_interlock: { - bool l3_fence, slm_fence; + bool l3_fence, slm_fence, tgm_fence = false; const enum opcode opcode = instr->intrinsic == nir_intrinsic_begin_invocation_interlock ? SHADER_OPCODE_INTERLOCK : SHADER_OPCODE_MEMORY_FENCE; @@ -4282,6 +4282,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr nir_var_mem_ssbo | nir_var_mem_global); slm_fence = modes & nir_var_mem_shared; + + /* NIR currently doesn't have an image mode */ + if (devinfo->has_lsc) + tgm_fence = modes & nir_var_mem_ssbo; break; } @@ -4312,6 +4316,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr slm_fence = instr->intrinsic == nir_intrinsic_group_memory_barrier || instr->intrinsic == nir_intrinsic_memory_barrier || instr->intrinsic == nir_intrinsic_memory_barrier_shared; + tgm_fence = instr->intrinsic == nir_intrinsic_memory_barrier_image; break; } @@ -4354,7 +4359,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr devinfo->ver >= 10; /* HSD ES # 1404612949 */ unsigned fence_regs_count = 0; - fs_reg fence_regs[2] = {}; + fs_reg fence_regs[3] = {}; const fs_builder ubld = bld.group(8, 0); @@ -4364,8 +4369,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr ubld.vgrf(BRW_REGISTER_TYPE_UD), brw_vec8_grf(0, 0), brw_imm_ud(commit_enable), - brw_imm_ud(/* bti */ 0)); - fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE; + brw_imm_ud(0 /* BTI; ignored for LSC */)); + + fence->sfid = devinfo->has_lsc ? + GFX12_SFID_UGM : + GFX7_SFID_DATAPORT_DATA_CACHE; fence_regs[fence_regs_count++] = fence->dst; @@ -4380,6 +4388,19 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr fence_regs[fence_regs_count++] = render_fence->dst; } + + /* Translate l3_fence into untyped and typed fence on XeHP */ + if (devinfo->has_lsc && tgm_fence) { + fs_inst *fence = + ubld.emit(opcode, + ubld.vgrf(BRW_REGISTER_TYPE_UD), + brw_vec8_grf(0, 0), + brw_imm_ud(commit_enable), + brw_imm_ud(/* ignored */0)); + + fence->sfid = GFX12_SFID_TGM; + fence_regs[fence_regs_count++] = fence->dst; + } } if (slm_fence) { @@ -4389,13 +4410,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr ubld.vgrf(BRW_REGISTER_TYPE_UD), brw_vec8_grf(0, 0), brw_imm_ud(commit_enable), - brw_imm_ud(GFX7_BTI_SLM)); - fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE; + brw_imm_ud(GFX7_BTI_SLM /* ignored for LSC */)); + if (devinfo->has_lsc) + fence->sfid = GFX12_SFID_SLM; + else + fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE; fence_regs[fence_regs_count++] = fence->dst; } - assert(fence_regs_count <= 2); + assert(fence_regs_count <= 3); if (stall || fence_regs_count == 0) { ubld.exec_all().group(1, 0).emit( diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index a9f93609561..fdffdfdaf83 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -939,6 +939,9 @@ namespace { abort(); case GFX7_SFID_DATAPORT_DATA_CACHE: + case GFX12_SFID_SLM: + case GFX12_SFID_TGM: + case GFX12_SFID_UGM: case HSW_SFID_DATAPORT_DATA_CACHE_1: if (devinfo->ver >= 7) return calculate_desc(info, unit_dp_dc, 2, 0, 0, 30 /* XXX */, 0,