intel/compiler: Add support for LSC fence operations
v2 (Jason Ekstrand): - Squash SLM and global fence ops together v3 (Jason Ekstrand): - Rework to use message descriptors instead of instruction fields v4 (Jason Ekstrand): - Don't pass BTI into back-end emit function. Always use FLAT. Co-authored-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>
This commit is contained in:
@@ -1154,6 +1154,21 @@ brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
|
||||
return GET_BITS(desc, 18, 18);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
|
||||
enum lsc_fence_scope scope,
|
||||
enum lsc_flush_type flush_type,
|
||||
bool route_to_lsc)
|
||||
{
|
||||
assert(devinfo->has_lsc);
|
||||
return SET_BITS(LSC_OP_FENCE, 5, 0) |
|
||||
SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
|
||||
SET_BITS(scope, 11, 9) |
|
||||
SET_BITS(flush_type, 14, 12) |
|
||||
SET_BITS(route_to_lsc, 18, 18) |
|
||||
SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_mdc_sm2(unsigned exec_size)
|
||||
{
|
||||
|
@@ -3235,6 +3235,30 @@ brw_set_memory_fence_message(struct brw_codegen *p,
|
||||
brw_inst_set_binding_table_index(devinfo, insn, bti);
|
||||
}
|
||||
|
||||
static void
|
||||
gfx12_set_memory_fence_message(struct brw_codegen *p,
|
||||
struct brw_inst *insn,
|
||||
enum brw_message_target sfid)
|
||||
{
|
||||
const unsigned mlen = 1; /* g0 header */
|
||||
/* Completion signaled by write to register. No data returned. */
|
||||
const unsigned rlen = 1;
|
||||
|
||||
brw_inst_set_sfid(p->devinfo, insn, sfid);
|
||||
|
||||
enum lsc_fence_scope scope = LSC_FENCE_THREADGROUP;
|
||||
enum lsc_flush_type flush_type = LSC_FLUSH_TYPE_NONE;
|
||||
|
||||
if (sfid == GFX12_SFID_TGM) {
|
||||
scope = LSC_FENCE_GPU;
|
||||
flush_type = LSC_FLUSH_TYPE_EVICT;
|
||||
}
|
||||
|
||||
brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
|
||||
flush_type, false) |
|
||||
brw_message_desc(p->devinfo, mlen, rlen, false));
|
||||
}
|
||||
|
||||
void
|
||||
brw_memory_fence(struct brw_codegen *p,
|
||||
struct brw_reg dst,
|
||||
@@ -3257,7 +3281,12 @@ brw_memory_fence(struct brw_codegen *p,
|
||||
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
|
||||
brw_set_dest(p, insn, dst);
|
||||
brw_set_src0(p, insn, src);
|
||||
brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
|
||||
|
||||
/* All DG2 hardware requires LSC for fence messages, even A-step */
|
||||
if (devinfo->has_lsc)
|
||||
gfx12_set_memory_fence_message(p, insn, sfid);
|
||||
else
|
||||
brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -4270,7 +4270,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
case nir_intrinsic_memory_barrier:
|
||||
case nir_intrinsic_begin_invocation_interlock:
|
||||
case nir_intrinsic_end_invocation_interlock: {
|
||||
bool l3_fence, slm_fence;
|
||||
bool l3_fence, slm_fence, tgm_fence = false;
|
||||
const enum opcode opcode =
|
||||
instr->intrinsic == nir_intrinsic_begin_invocation_interlock ?
|
||||
SHADER_OPCODE_INTERLOCK : SHADER_OPCODE_MEMORY_FENCE;
|
||||
@@ -4282,6 +4282,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
nir_var_mem_ssbo |
|
||||
nir_var_mem_global);
|
||||
slm_fence = modes & nir_var_mem_shared;
|
||||
|
||||
/* NIR currently doesn't have an image mode */
|
||||
if (devinfo->has_lsc)
|
||||
tgm_fence = modes & nir_var_mem_ssbo;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -4312,6 +4316,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
slm_fence = instr->intrinsic == nir_intrinsic_group_memory_barrier ||
|
||||
instr->intrinsic == nir_intrinsic_memory_barrier ||
|
||||
instr->intrinsic == nir_intrinsic_memory_barrier_shared;
|
||||
tgm_fence = instr->intrinsic == nir_intrinsic_memory_barrier_image;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -4354,7 +4359,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
devinfo->ver >= 10; /* HSD ES # 1404612949 */
|
||||
|
||||
unsigned fence_regs_count = 0;
|
||||
fs_reg fence_regs[2] = {};
|
||||
fs_reg fence_regs[3] = {};
|
||||
|
||||
const fs_builder ubld = bld.group(8, 0);
|
||||
|
||||
@@ -4364,8 +4369,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
ubld.vgrf(BRW_REGISTER_TYPE_UD),
|
||||
brw_vec8_grf(0, 0),
|
||||
brw_imm_ud(commit_enable),
|
||||
brw_imm_ud(/* bti */ 0));
|
||||
fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
|
||||
brw_imm_ud(0 /* BTI; ignored for LSC */));
|
||||
|
||||
fence->sfid = devinfo->has_lsc ?
|
||||
GFX12_SFID_UGM :
|
||||
GFX7_SFID_DATAPORT_DATA_CACHE;
|
||||
|
||||
fence_regs[fence_regs_count++] = fence->dst;
|
||||
|
||||
@@ -4380,6 +4388,19 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
|
||||
fence_regs[fence_regs_count++] = render_fence->dst;
|
||||
}
|
||||
|
||||
/* Translate l3_fence into untyped and typed fence on XeHP */
|
||||
if (devinfo->has_lsc && tgm_fence) {
|
||||
fs_inst *fence =
|
||||
ubld.emit(opcode,
|
||||
ubld.vgrf(BRW_REGISTER_TYPE_UD),
|
||||
brw_vec8_grf(0, 0),
|
||||
brw_imm_ud(commit_enable),
|
||||
brw_imm_ud(/* ignored */0));
|
||||
|
||||
fence->sfid = GFX12_SFID_TGM;
|
||||
fence_regs[fence_regs_count++] = fence->dst;
|
||||
}
|
||||
}
|
||||
|
||||
if (slm_fence) {
|
||||
@@ -4389,13 +4410,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
ubld.vgrf(BRW_REGISTER_TYPE_UD),
|
||||
brw_vec8_grf(0, 0),
|
||||
brw_imm_ud(commit_enable),
|
||||
brw_imm_ud(GFX7_BTI_SLM));
|
||||
fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
|
||||
brw_imm_ud(GFX7_BTI_SLM /* ignored for LSC */));
|
||||
if (devinfo->has_lsc)
|
||||
fence->sfid = GFX12_SFID_SLM;
|
||||
else
|
||||
fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
|
||||
|
||||
fence_regs[fence_regs_count++] = fence->dst;
|
||||
}
|
||||
|
||||
assert(fence_regs_count <= 2);
|
||||
assert(fence_regs_count <= 3);
|
||||
|
||||
if (stall || fence_regs_count == 0) {
|
||||
ubld.exec_all().group(1, 0).emit(
|
||||
|
@@ -939,6 +939,9 @@ namespace {
|
||||
abort();
|
||||
|
||||
case GFX7_SFID_DATAPORT_DATA_CACHE:
|
||||
case GFX12_SFID_SLM:
|
||||
case GFX12_SFID_TGM:
|
||||
case GFX12_SFID_UGM:
|
||||
case HSW_SFID_DATAPORT_DATA_CACHE_1:
|
||||
if (devinfo->ver >= 7)
|
||||
return calculate_desc(info, unit_dp_dc, 2, 0, 0, 30 /* XXX */, 0,
|
||||
|
Reference in New Issue
Block a user