intel/fs: Add Wa_22013689345
v2: Use a simpler framework (Lionel) v3: Rebase, add task/mesh (Lionel) v4: Fixup fence exec size (SIMDX -> SIMD1) v5: Fix invalidate_analysis, add finishme comment (Curro) Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: 22.0 <mesa-stable> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Reviewed-by: Francisco Jerez <currojerez@riseup.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14947>
This commit is contained in:
@@ -679,6 +679,7 @@ static const char* const lsc_flush_type[] = {
|
|||||||
[LSC_FLUSH_TYPE_DISCARD] = "discard",
|
[LSC_FLUSH_TYPE_DISCARD] = "discard",
|
||||||
[LSC_FLUSH_TYPE_CLEAN] = "clean",
|
[LSC_FLUSH_TYPE_CLEAN] = "clean",
|
||||||
[LSC_FLUSH_TYPE_L3ONLY] = "l3only",
|
[LSC_FLUSH_TYPE_L3ONLY] = "l3only",
|
||||||
|
[LSC_FLUSH_TYPE_NONE_6] = "none_6",
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* const lsc_addr_size[] = {
|
static const char* const lsc_addr_size[] = {
|
||||||
|
@@ -1781,6 +1781,7 @@ brw_memory_fence(struct brw_codegen *p,
|
|||||||
struct brw_reg src,
|
struct brw_reg src,
|
||||||
enum opcode send_op,
|
enum opcode send_op,
|
||||||
enum brw_message_target sfid,
|
enum brw_message_target sfid,
|
||||||
|
uint32_t desc,
|
||||||
bool commit_enable,
|
bool commit_enable,
|
||||||
unsigned bti);
|
unsigned bti);
|
||||||
|
|
||||||
|
@@ -1991,6 +1991,11 @@ enum PACKED lsc_flush_type {
|
|||||||
* Flush "RW" section of the L3 cache, but leave L1 and L2 caches untouched.
|
* Flush "RW" section of the L3 cache, but leave L1 and L2 caches untouched.
|
||||||
*/
|
*/
|
||||||
LSC_FLUSH_TYPE_L3ONLY = 5,
|
LSC_FLUSH_TYPE_L3ONLY = 5,
|
||||||
|
/*
|
||||||
|
* HW maps this flush type internally to NONE.
|
||||||
|
*/
|
||||||
|
LSC_FLUSH_TYPE_NONE_6 = 6,
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum PACKED lsc_backup_fence_routing {
|
enum PACKED lsc_backup_fence_routing {
|
||||||
|
@@ -3256,7 +3256,8 @@ brw_set_memory_fence_message(struct brw_codegen *p,
|
|||||||
static void
|
static void
|
||||||
gfx12_set_memory_fence_message(struct brw_codegen *p,
|
gfx12_set_memory_fence_message(struct brw_codegen *p,
|
||||||
struct brw_inst *insn,
|
struct brw_inst *insn,
|
||||||
enum brw_message_target sfid)
|
enum brw_message_target sfid,
|
||||||
|
uint32_t desc)
|
||||||
{
|
{
|
||||||
const unsigned mlen = 1; /* g0 header */
|
const unsigned mlen = 1; /* g0 header */
|
||||||
/* Completion signaled by write to register. No data returned. */
|
/* Completion signaled by write to register. No data returned. */
|
||||||
@@ -3268,8 +3269,8 @@ gfx12_set_memory_fence_message(struct brw_codegen *p,
|
|||||||
brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
|
brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
|
||||||
brw_message_desc(p->devinfo, mlen, rlen, false));
|
brw_message_desc(p->devinfo, mlen, rlen, false));
|
||||||
} else {
|
} else {
|
||||||
enum lsc_fence_scope scope = LSC_FENCE_THREADGROUP;
|
enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
|
||||||
enum lsc_flush_type flush_type = LSC_FLUSH_TYPE_NONE;
|
enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
|
||||||
|
|
||||||
if (sfid == GFX12_SFID_TGM) {
|
if (sfid == GFX12_SFID_TGM) {
|
||||||
scope = LSC_FENCE_TILE;
|
scope = LSC_FENCE_TILE;
|
||||||
@@ -3288,6 +3289,7 @@ brw_memory_fence(struct brw_codegen *p,
|
|||||||
struct brw_reg src,
|
struct brw_reg src,
|
||||||
enum opcode send_op,
|
enum opcode send_op,
|
||||||
enum brw_message_target sfid,
|
enum brw_message_target sfid,
|
||||||
|
uint32_t desc,
|
||||||
bool commit_enable,
|
bool commit_enable,
|
||||||
unsigned bti)
|
unsigned bti)
|
||||||
{
|
{
|
||||||
@@ -3307,7 +3309,7 @@ brw_memory_fence(struct brw_codegen *p,
|
|||||||
|
|
||||||
/* All DG2 hardware requires LSC for fence messages, even A-step */
|
/* All DG2 hardware requires LSC for fence messages, even A-step */
|
||||||
if (devinfo->has_lsc)
|
if (devinfo->has_lsc)
|
||||||
gfx12_set_memory_fence_message(p, insn, sfid);
|
gfx12_set_memory_fence_message(p, insn, sfid, desc);
|
||||||
else
|
else
|
||||||
brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
|
brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
|
||||||
}
|
}
|
||||||
|
@@ -8618,6 +8618,75 @@ fs_visitor::fixup_3src_null_dest()
|
|||||||
DEPENDENCY_VARIABLES);
|
DEPENDENCY_VARIABLES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
needs_dummy_fence(const intel_device_info *devinfo, fs_inst *inst)
|
||||||
|
{
|
||||||
|
/* This workaround is about making sure that any instruction writing
|
||||||
|
* through UGM has completed before we hit EOT.
|
||||||
|
*
|
||||||
|
* The workaround talks about UGM writes or atomic message but what is
|
||||||
|
* important is anything that hasn't completed. Usually any SEND
|
||||||
|
* instruction that has a destination register will be read by something
|
||||||
|
* else so we don't need to care about those as they will be synchronized
|
||||||
|
* by other parts of the shader or optimized away. What is left are
|
||||||
|
* instructions that don't have a destination register.
|
||||||
|
*/
|
||||||
|
if (inst->sfid != GFX12_SFID_UGM)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return inst->dst.file == BAD_FILE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wa_22013689345
|
||||||
|
*
|
||||||
|
* We need to emit UGM fence message before EOT, if shader has any UGM write
|
||||||
|
* or atomic message.
|
||||||
|
*
|
||||||
|
* TODO/FINISHME: According to Curro we could avoid the fence in some cases.
|
||||||
|
* We probably need a better criteria in needs_dummy_fence().
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
fs_visitor::emit_dummy_memory_fence_before_eot()
|
||||||
|
{
|
||||||
|
bool progress = false;
|
||||||
|
bool has_ugm_write_or_atomic = false;
|
||||||
|
|
||||||
|
if (!intel_device_info_is_dg2(devinfo))
|
||||||
|
return;
|
||||||
|
|
||||||
|
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
|
||||||
|
if (!inst->eot) {
|
||||||
|
if (needs_dummy_fence(devinfo, inst))
|
||||||
|
has_ugm_write_or_atomic = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!has_ugm_write_or_atomic)
|
||||||
|
break;
|
||||||
|
|
||||||
|
const fs_builder ibld(this, block, inst);
|
||||||
|
const fs_builder ubld = ibld.exec_all().group(1, 0);
|
||||||
|
|
||||||
|
fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
fs_inst *dummy_fence = ubld.emit(SHADER_OPCODE_MEMORY_FENCE,
|
||||||
|
dst, brw_vec8_grf(0, 0),
|
||||||
|
/* commit enable */ brw_imm_ud(1),
|
||||||
|
/* bti */ brw_imm_ud(0));
|
||||||
|
dummy_fence->sfid = GFX12_SFID_UGM;
|
||||||
|
dummy_fence->desc = lsc_fence_msg_desc(devinfo, LSC_FENCE_TILE,
|
||||||
|
LSC_FLUSH_TYPE_NONE_6, false);
|
||||||
|
ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), dst);
|
||||||
|
progress = true;
|
||||||
|
/* TODO: remove this break if we ever have shader with multiple EOT. */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (progress) {
|
||||||
|
invalidate_analysis(DEPENDENCY_INSTRUCTIONS |
|
||||||
|
DEPENDENCY_VARIABLES);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the first instruction in the program that might start a region of
|
* Find the first instruction in the program that might start a region of
|
||||||
* divergent control flow due to a HALT jump. There is no
|
* divergent control flow due to a HALT jump. There is no
|
||||||
@@ -8927,6 +8996,7 @@ fs_visitor::run_vs()
|
|||||||
assign_vs_urb_setup();
|
assign_vs_urb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
|
emit_dummy_memory_fence_before_eot();
|
||||||
allocate_registers(true /* allow_spilling */);
|
allocate_registers(true /* allow_spilling */);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
@@ -9049,6 +9119,7 @@ fs_visitor::run_tcs()
|
|||||||
assign_tcs_urb_setup();
|
assign_tcs_urb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
|
emit_dummy_memory_fence_before_eot();
|
||||||
allocate_registers(true /* allow_spilling */);
|
allocate_registers(true /* allow_spilling */);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
@@ -9077,6 +9148,7 @@ fs_visitor::run_tes()
|
|||||||
assign_tes_urb_setup();
|
assign_tes_urb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
|
emit_dummy_memory_fence_before_eot();
|
||||||
allocate_registers(true /* allow_spilling */);
|
allocate_registers(true /* allow_spilling */);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
@@ -9120,6 +9192,7 @@ fs_visitor::run_gs()
|
|||||||
assign_gs_urb_setup();
|
assign_gs_urb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
|
emit_dummy_memory_fence_before_eot();
|
||||||
allocate_registers(true /* allow_spilling */);
|
allocate_registers(true /* allow_spilling */);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
@@ -9220,6 +9293,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
|||||||
assign_urb_setup();
|
assign_urb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
|
emit_dummy_memory_fence_before_eot();
|
||||||
|
|
||||||
allocate_registers(allow_spilling);
|
allocate_registers(allow_spilling);
|
||||||
}
|
}
|
||||||
@@ -9255,6 +9329,7 @@ fs_visitor::run_cs(bool allow_spilling)
|
|||||||
assign_curb_setup();
|
assign_curb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
|
emit_dummy_memory_fence_before_eot();
|
||||||
allocate_registers(allow_spilling);
|
allocate_registers(allow_spilling);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
@@ -9283,6 +9358,7 @@ fs_visitor::run_bs(bool allow_spilling)
|
|||||||
assign_curb_setup();
|
assign_curb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
|
emit_dummy_memory_fence_before_eot();
|
||||||
allocate_registers(allow_spilling);
|
allocate_registers(allow_spilling);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
@@ -9327,6 +9403,7 @@ fs_visitor::run_task(bool allow_spilling)
|
|||||||
assign_curb_setup();
|
assign_curb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
|
emit_dummy_memory_fence_before_eot();
|
||||||
allocate_registers(allow_spilling);
|
allocate_registers(allow_spilling);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
@@ -9371,6 +9448,7 @@ fs_visitor::run_mesh(bool allow_spilling)
|
|||||||
assign_curb_setup();
|
assign_curb_setup();
|
||||||
|
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
|
emit_dummy_memory_fence_before_eot();
|
||||||
allocate_registers(allow_spilling);
|
allocate_registers(allow_spilling);
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
|
@@ -136,6 +136,7 @@ public:
|
|||||||
void setup_cs_payload();
|
void setup_cs_payload();
|
||||||
bool fixup_sends_duplicate_payload();
|
bool fixup_sends_duplicate_payload();
|
||||||
void fixup_3src_null_dest();
|
void fixup_3src_null_dest();
|
||||||
|
void emit_dummy_memory_fence_before_eot();
|
||||||
bool fixup_nomask_control_flow();
|
bool fixup_nomask_control_flow();
|
||||||
void assign_curb_setup();
|
void assign_curb_setup();
|
||||||
void assign_urb_setup();
|
void assign_urb_setup();
|
||||||
|
@@ -2382,6 +2382,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
|
|
||||||
brw_memory_fence(p, dst, src[0], send_op,
|
brw_memory_fence(p, dst, src[0], send_op,
|
||||||
brw_message_target(inst->sfid),
|
brw_message_target(inst->sfid),
|
||||||
|
inst->desc,
|
||||||
/* commit_enable */ src[1].ud,
|
/* commit_enable */ src[1].ud,
|
||||||
/* bti */ src[2].ud);
|
/* bti */ src[2].ud);
|
||||||
send_count++;
|
send_count++;
|
||||||
|
@@ -1926,6 +1926,7 @@ generate_code(struct brw_codegen *p,
|
|||||||
case SHADER_OPCODE_MEMORY_FENCE:
|
case SHADER_OPCODE_MEMORY_FENCE:
|
||||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND,
|
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND,
|
||||||
brw_message_target(inst->sfid),
|
brw_message_target(inst->sfid),
|
||||||
|
inst->desc,
|
||||||
/* commit_enable */ false,
|
/* commit_enable */ false,
|
||||||
/* bti */ 0);
|
/* bti */ 0);
|
||||||
send_count++;
|
send_count++;
|
||||||
|
Reference in New Issue
Block a user