intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>
This commit is contained in:
Sagar Ghuge
2020-07-11 18:33:05 -07:00
committed by Marge Bot
parent cf612e4dc1
commit b67f1ff465
4 changed files with 79 additions and 8 deletions

View File

@@ -1154,6 +1154,21 @@ brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
return GET_BITS(desc, 18, 18);
}
static inline uint32_t
lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
enum lsc_fence_scope scope,
enum lsc_flush_type flush_type,
bool route_to_lsc)
{
assert(devinfo->has_lsc);
return SET_BITS(LSC_OP_FENCE, 5, 0) |
SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
SET_BITS(scope, 11, 9) |
SET_BITS(flush_type, 14, 12) |
SET_BITS(route_to_lsc, 18, 18) |
SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
}
static inline uint32_t
brw_mdc_sm2(unsigned exec_size)
{

View File

@@ -3235,6 +3235,30 @@ brw_set_memory_fence_message(struct brw_codegen *p,
brw_inst_set_binding_table_index(devinfo, insn, bti);
}
static void
gfx12_set_memory_fence_message(struct brw_codegen *p,
struct brw_inst *insn,
enum brw_message_target sfid)
{
const unsigned mlen = 1; /* g0 header */
/* Completion signaled by write to register. No data returned. */
const unsigned rlen = 1;
brw_inst_set_sfid(p->devinfo, insn, sfid);
enum lsc_fence_scope scope = LSC_FENCE_THREADGROUP;
enum lsc_flush_type flush_type = LSC_FLUSH_TYPE_NONE;
if (sfid == GFX12_SFID_TGM) {
scope = LSC_FENCE_GPU;
flush_type = LSC_FLUSH_TYPE_EVICT;
}
brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
flush_type, false) |
brw_message_desc(p->devinfo, mlen, rlen, false));
}
void
brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst,
@@ -3257,7 +3281,12 @@ brw_memory_fence(struct brw_codegen *p,
brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
brw_set_dest(p, insn, dst);
brw_set_src0(p, insn, src);
brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
/* All DG2 hardware requires LSC for fence messages, even A-step */
if (devinfo->has_lsc)
gfx12_set_memory_fence_message(p, insn, sfid);
else
brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
}
void

View File

@@ -4270,7 +4270,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
case nir_intrinsic_memory_barrier:
case nir_intrinsic_begin_invocation_interlock:
case nir_intrinsic_end_invocation_interlock: {
bool l3_fence, slm_fence;
bool l3_fence, slm_fence, tgm_fence = false;
const enum opcode opcode =
instr->intrinsic == nir_intrinsic_begin_invocation_interlock ?
SHADER_OPCODE_INTERLOCK : SHADER_OPCODE_MEMORY_FENCE;
@@ -4282,6 +4282,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
nir_var_mem_ssbo |
nir_var_mem_global);
slm_fence = modes & nir_var_mem_shared;
/* NIR currently doesn't have an image mode */
if (devinfo->has_lsc)
tgm_fence = modes & nir_var_mem_ssbo;
break;
}
@@ -4312,6 +4316,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
slm_fence = instr->intrinsic == nir_intrinsic_group_memory_barrier ||
instr->intrinsic == nir_intrinsic_memory_barrier ||
instr->intrinsic == nir_intrinsic_memory_barrier_shared;
tgm_fence = instr->intrinsic == nir_intrinsic_memory_barrier_image;
break;
}
@@ -4354,7 +4359,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
devinfo->ver >= 10; /* HSD ES # 1404612949 */
unsigned fence_regs_count = 0;
fs_reg fence_regs[2] = {};
fs_reg fence_regs[3] = {};
const fs_builder ubld = bld.group(8, 0);
@@ -4364,8 +4369,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
ubld.vgrf(BRW_REGISTER_TYPE_UD),
brw_vec8_grf(0, 0),
brw_imm_ud(commit_enable),
brw_imm_ud(/* bti */ 0));
fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
brw_imm_ud(0 /* BTI; ignored for LSC */));
fence->sfid = devinfo->has_lsc ?
GFX12_SFID_UGM :
GFX7_SFID_DATAPORT_DATA_CACHE;
fence_regs[fence_regs_count++] = fence->dst;
@@ -4380,6 +4388,19 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
fence_regs[fence_regs_count++] = render_fence->dst;
}
/* Translate l3_fence into untyped and typed fence on XeHP */
if (devinfo->has_lsc && tgm_fence) {
fs_inst *fence =
ubld.emit(opcode,
ubld.vgrf(BRW_REGISTER_TYPE_UD),
brw_vec8_grf(0, 0),
brw_imm_ud(commit_enable),
brw_imm_ud(/* ignored */0));
fence->sfid = GFX12_SFID_TGM;
fence_regs[fence_regs_count++] = fence->dst;
}
}
if (slm_fence) {
@@ -4389,13 +4410,16 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
ubld.vgrf(BRW_REGISTER_TYPE_UD),
brw_vec8_grf(0, 0),
brw_imm_ud(commit_enable),
brw_imm_ud(GFX7_BTI_SLM));
fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
brw_imm_ud(GFX7_BTI_SLM /* ignored for LSC */));
if (devinfo->has_lsc)
fence->sfid = GFX12_SFID_SLM;
else
fence->sfid = GFX7_SFID_DATAPORT_DATA_CACHE;
fence_regs[fence_regs_count++] = fence->dst;
}
assert(fence_regs_count <= 2);
assert(fence_regs_count <= 3);
if (stall || fence_regs_count == 0) {
ubld.exec_all().group(1, 0).emit(

View File

@@ -939,6 +939,9 @@ namespace {
abort();
case GFX7_SFID_DATAPORT_DATA_CACHE:
case GFX12_SFID_SLM:
case GFX12_SFID_TGM:
case GFX12_SFID_UGM:
case HSW_SFID_DATAPORT_DATA_CACHE_1:
if (devinfo->ver >= 7)
return calculate_desc(info, unit_dp_dc, 2, 0, 0, 30 /* XXX */, 0,