intel/fs: Add Wa_22013689345
v2: Use a simpler framework (Lionel) v3: Rebase, add task/mesh (Lionel) v4: Fixup fence exec size (SIMDX -> SIMD1) v5: Fix invalidate_analysis, add finishme comment (Curro) Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: 22.0 <mesa-stable> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14947>
This commit is contained in:
@@ -679,6 +679,7 @@ static const char* const lsc_flush_type[] = {
|
||||
[LSC_FLUSH_TYPE_DISCARD] = "discard",
|
||||
[LSC_FLUSH_TYPE_CLEAN] = "clean",
|
||||
[LSC_FLUSH_TYPE_L3ONLY] = "l3only",
|
||||
[LSC_FLUSH_TYPE_NONE_6] = "none_6",
|
||||
};
|
||||
|
||||
static const char* const lsc_addr_size[] = {
|
||||
|
@@ -1781,6 +1781,7 @@ brw_memory_fence(struct brw_codegen *p,
|
||||
struct brw_reg src,
|
||||
enum opcode send_op,
|
||||
enum brw_message_target sfid,
|
||||
uint32_t desc,
|
||||
bool commit_enable,
|
||||
unsigned bti);
|
||||
|
||||
|
@@ -1991,6 +1991,11 @@ enum PACKED lsc_flush_type {
|
||||
* Flush "RW" section of the L3 cache, but leave L1 and L2 caches untouched.
|
||||
*/
|
||||
LSC_FLUSH_TYPE_L3ONLY = 5,
|
||||
/*
|
||||
* HW maps this flush type internally to NONE.
|
||||
*/
|
||||
LSC_FLUSH_TYPE_NONE_6 = 6,
|
||||
|
||||
};
|
||||
|
||||
enum PACKED lsc_backup_fence_routing {
|
||||
|
@@ -3256,7 +3256,8 @@ brw_set_memory_fence_message(struct brw_codegen *p,
|
||||
static void
|
||||
gfx12_set_memory_fence_message(struct brw_codegen *p,
|
||||
struct brw_inst *insn,
|
||||
enum brw_message_target sfid)
|
||||
enum brw_message_target sfid,
|
||||
uint32_t desc)
|
||||
{
|
||||
const unsigned mlen = 1; /* g0 header */
|
||||
/* Completion signaled by write to register. No data returned. */
|
||||
@@ -3268,8 +3269,8 @@ gfx12_set_memory_fence_message(struct brw_codegen *p,
|
||||
brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
|
||||
brw_message_desc(p->devinfo, mlen, rlen, false));
|
||||
} else {
|
||||
enum lsc_fence_scope scope = LSC_FENCE_THREADGROUP;
|
||||
enum lsc_flush_type flush_type = LSC_FLUSH_TYPE_NONE;
|
||||
enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
|
||||
enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
|
||||
|
||||
if (sfid == GFX12_SFID_TGM) {
|
||||
scope = LSC_FENCE_TILE;
|
||||
@@ -3288,6 +3289,7 @@ brw_memory_fence(struct brw_codegen *p,
|
||||
struct brw_reg src,
|
||||
enum opcode send_op,
|
||||
enum brw_message_target sfid,
|
||||
uint32_t desc,
|
||||
bool commit_enable,
|
||||
unsigned bti)
|
||||
{
|
||||
@@ -3307,7 +3309,7 @@ brw_memory_fence(struct brw_codegen *p,
|
||||
|
||||
/* All DG2 hardware requires LSC for fence messages, even A-step */
|
||||
if (devinfo->has_lsc)
|
||||
gfx12_set_memory_fence_message(p, insn, sfid);
|
||||
gfx12_set_memory_fence_message(p, insn, sfid, desc);
|
||||
else
|
||||
brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
|
||||
}
|
||||
|
@@ -8618,6 +8618,75 @@ fs_visitor::fixup_3src_null_dest()
|
||||
DEPENDENCY_VARIABLES);
|
||||
}
|
||||
|
||||
static bool
|
||||
needs_dummy_fence(const intel_device_info *devinfo, fs_inst *inst)
|
||||
{
|
||||
/* This workaround is about making sure that any instruction writing
|
||||
* through UGM has completed before we hit EOT.
|
||||
*
|
||||
* The workaround talks about UGM writes or atomic message but what is
|
||||
* important is anything that hasn't completed. Usually any SEND
|
||||
* instruction that has a destination register will be read by something
|
||||
* else so we don't need to care about those as they will be synchronized
|
||||
* by other parts of the shader or optimized away. What is left are
|
||||
* instructions that don't have a destination register.
|
||||
*/
|
||||
if (inst->sfid != GFX12_SFID_UGM)
|
||||
return false;
|
||||
|
||||
return inst->dst.file == BAD_FILE;
|
||||
}
|
||||
|
||||
/* Wa_22013689345
|
||||
*
|
||||
* We need to emit UGM fence message before EOT, if shader has any UGM write
|
||||
* or atomic message.
|
||||
*
|
||||
* TODO/FINISHME: According to Curro we could avoid the fence in some cases.
|
||||
* We probably need a better criteria in needs_dummy_fence().
|
||||
*/
|
||||
void
|
||||
fs_visitor::emit_dummy_memory_fence_before_eot()
|
||||
{
|
||||
bool progress = false;
|
||||
bool has_ugm_write_or_atomic = false;
|
||||
|
||||
if (!intel_device_info_is_dg2(devinfo))
|
||||
return;
|
||||
|
||||
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
|
||||
if (!inst->eot) {
|
||||
if (needs_dummy_fence(devinfo, inst))
|
||||
has_ugm_write_or_atomic = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!has_ugm_write_or_atomic)
|
||||
break;
|
||||
|
||||
const fs_builder ibld(this, block, inst);
|
||||
const fs_builder ubld = ibld.exec_all().group(1, 0);
|
||||
|
||||
fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
fs_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE,
|
||||
dst, brw_vec8_grf(0, 0),
|
||||
/* commit enable */ brw_imm_ud(1),
|
||||
/* bti */ brw_imm_ud(0));
|
||||
dummy_fence->sfid = GFX12_SFID_UGM;
|
||||
dummy_fence->desc = lsc_fence_msg_desc(devinfo, LSC_FENCE_TILE,
|
||||
LSC_FLUSH_TYPE_NONE_6, false);
|
||||
ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), dst);
|
||||
progress = true;
|
||||
/* TODO: remove this break if we ever have shader with multiple EOT. */
|
||||
break;
|
||||
}
|
||||
|
||||
if (progress) {
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS |
|
||||
DEPENDENCY_VARIABLES);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the first instruction in the program that might start a region of
|
||||
* divergent control flow due to a HALT jump. There is no
|
||||
@@ -8927,6 +8996,7 @@ fs_visitor::run_vs()
|
||||
assign_vs_urb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
emit_dummy_memory_fence_before_eot();
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
return !failed;
|
||||
@@ -9049,6 +9119,7 @@ fs_visitor::run_tcs()
|
||||
assign_tcs_urb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
emit_dummy_memory_fence_before_eot();
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
return !failed;
|
||||
@@ -9077,6 +9148,7 @@ fs_visitor::run_tes()
|
||||
assign_tes_urb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
emit_dummy_memory_fence_before_eot();
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
return !failed;
|
||||
@@ -9120,6 +9192,7 @@ fs_visitor::run_gs()
|
||||
assign_gs_urb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
emit_dummy_memory_fence_before_eot();
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
return !failed;
|
||||
@@ -9220,6 +9293,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
||||
assign_urb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
emit_dummy_memory_fence_before_eot();
|
||||
|
||||
allocate_registers(allow_spilling);
|
||||
}
|
||||
@@ -9255,6 +9329,7 @@ fs_visitor::run_cs(bool allow_spilling)
|
||||
assign_curb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
emit_dummy_memory_fence_before_eot();
|
||||
allocate_registers(allow_spilling);
|
||||
|
||||
return !failed;
|
||||
@@ -9283,6 +9358,7 @@ fs_visitor::run_bs(bool allow_spilling)
|
||||
assign_curb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
emit_dummy_memory_fence_before_eot();
|
||||
allocate_registers(allow_spilling);
|
||||
|
||||
return !failed;
|
||||
@@ -9327,6 +9403,7 @@ fs_visitor::run_task(bool allow_spilling)
|
||||
assign_curb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
emit_dummy_memory_fence_before_eot();
|
||||
allocate_registers(allow_spilling);
|
||||
|
||||
return !failed;
|
||||
@@ -9371,6 +9448,7 @@ fs_visitor::run_mesh(bool allow_spilling)
|
||||
assign_curb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
emit_dummy_memory_fence_before_eot();
|
||||
allocate_registers(allow_spilling);
|
||||
|
||||
return !failed;
|
||||
|
@@ -136,6 +136,7 @@ public:
|
||||
void setup_cs_payload();
|
||||
bool fixup_sends_duplicate_payload();
|
||||
void fixup_3src_null_dest();
|
||||
void emit_dummy_memory_fence_before_eot();
|
||||
bool fixup_nomask_control_flow();
|
||||
void assign_curb_setup();
|
||||
void assign_urb_setup();
|
||||
|
@@ -2382,6 +2382,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
||||
|
||||
brw_memory_fence(p, dst, src[0], send_op,
|
||||
brw_message_target(inst->sfid),
|
||||
inst->desc,
|
||||
/* commit_enable */ src[1].ud,
|
||||
/* bti */ src[2].ud);
|
||||
send_count++;
|
||||
|
@@ -1926,6 +1926,7 @@ generate_code(struct brw_codegen *p,
|
||||
case SHADER_OPCODE_MEMORY_FENCE:
|
||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND,
|
||||
brw_message_target(inst->sfid),
|
||||
inst->desc,
|
||||
/* commit_enable */ false,
|
||||
/* bti */ 0);
|
||||
send_count++;
|
||||
|
Reference in New Issue
Block a user