intel/elk/chv: Implement WaClearArfDependenciesBeforeEot
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31746>
This commit is contained in:

committed by
Marge Bot

parent
2a4efe21c5
commit
ebd6738260
@@ -3318,6 +3318,94 @@ elk_fs_visitor::insert_gfx4_send_dependency_workarounds()
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||
}
|
||||
|
||||
/**
|
||||
* flags_read() and flags_written() return flag access with byte granularity,
|
||||
* but for Flag Register PRM lists "Access Granularity: Word", so we can assume
|
||||
* accessing any part of a word will clear its register dependency.
|
||||
*/
|
||||
static unsigned
|
||||
bytes_bitmask_to_words(unsigned b)
|
||||
{
|
||||
unsigned first_byte_mask = b & 0x55555555;
|
||||
unsigned second_byte_mask = b & 0xaaaaaaaa;
|
||||
return first_byte_mask |
|
||||
(first_byte_mask << 1) |
|
||||
second_byte_mask |
|
||||
(second_byte_mask >> 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* WaClearArfDependenciesBeforeEot
|
||||
*
|
||||
* Flag register dependency not cleared after EOT, so we have to source them
|
||||
* before EOT. We can do this with simple `mov(1) nullUD, f{0,1}UD`
|
||||
*
|
||||
* To avoid emitting MOVs when it's not needed, check if each block reads all
|
||||
* the flags it sets. We might falsely determine register as unread if it'll be
|
||||
* accessed inside the next blocks, but this still should be good enough.
|
||||
*/
|
||||
bool
|
||||
elk_fs_visitor::workaround_source_arf_before_eot()
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
if (devinfo->platform != INTEL_PLATFORM_CHV)
|
||||
return false;
|
||||
|
||||
unsigned flags_unread = 0;
|
||||
|
||||
foreach_block(block, cfg) {
|
||||
unsigned flags_unread_in_block = 0;
|
||||
|
||||
foreach_inst_in_block(elk_fs_inst, inst, block) {
|
||||
/* Instruction can read and write to the same flag, so the order is important */
|
||||
flags_unread_in_block &= ~bytes_bitmask_to_words(inst->flags_read(devinfo));
|
||||
flags_unread_in_block |= bytes_bitmask_to_words(inst->flags_written(devinfo));
|
||||
|
||||
/* HALT does not start its block even though it can leave a dependency */
|
||||
if (inst->opcode == ELK_OPCODE_HALT ||
|
||||
inst->opcode == ELK_SHADER_OPCODE_HALT_TARGET) {
|
||||
flags_unread |= flags_unread_in_block;
|
||||
flags_unread_in_block = 0;
|
||||
}
|
||||
}
|
||||
|
||||
flags_unread |= flags_unread_in_block;
|
||||
|
||||
if ((flags_unread & 0x0f) && (flags_unread & 0xf0))
|
||||
break;
|
||||
}
|
||||
|
||||
if (flags_unread) {
|
||||
int eot_count = 0;
|
||||
|
||||
foreach_block_and_inst_safe(block, elk_fs_inst, inst, cfg)
|
||||
{
|
||||
if (!inst->eot)
|
||||
continue;
|
||||
|
||||
/* Currently, we always emit only one EOT per program,
|
||||
* this WA should be updated if it ever changes.
|
||||
*/
|
||||
assert(++eot_count == 1);
|
||||
|
||||
const fs_builder ibld(this, block, inst);
|
||||
const fs_builder ubld = ibld.exec_all().group(1, 0);
|
||||
|
||||
if (flags_unread & 0x0f)
|
||||
ubld.MOV(ubld.null_reg_ud(), retype(elk_flag_reg(0, 0), ELK_REGISTER_TYPE_UD));
|
||||
|
||||
if (flags_unread & 0xf0)
|
||||
ubld.MOV(ubld.null_reg_ud(), retype(elk_flag_reg(1, 0), ELK_REGISTER_TYPE_UD));
|
||||
}
|
||||
|
||||
progress = true;
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
elk_fs_visitor::lower_load_payload()
|
||||
{
|
||||
@@ -5945,6 +6033,8 @@ elk_fs_visitor::run_vs()
|
||||
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
workaround_source_arf_before_eot();
|
||||
|
||||
return !failed;
|
||||
}
|
||||
|
||||
@@ -6062,6 +6152,8 @@ elk_fs_visitor::run_tcs()
|
||||
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
workaround_source_arf_before_eot();
|
||||
|
||||
return !failed;
|
||||
}
|
||||
|
||||
@@ -6090,6 +6182,8 @@ elk_fs_visitor::run_tes()
|
||||
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
workaround_source_arf_before_eot();
|
||||
|
||||
return !failed;
|
||||
}
|
||||
|
||||
@@ -6135,6 +6229,8 @@ elk_fs_visitor::run_gs()
|
||||
|
||||
allocate_registers(true /* allow_spilling */);
|
||||
|
||||
workaround_source_arf_before_eot();
|
||||
|
||||
return !failed;
|
||||
}
|
||||
|
||||
@@ -6206,6 +6302,8 @@ elk_fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
||||
fixup_3src_null_dest();
|
||||
|
||||
allocate_registers(allow_spilling);
|
||||
|
||||
workaround_source_arf_before_eot();
|
||||
}
|
||||
|
||||
return !failed;
|
||||
@@ -6244,6 +6342,8 @@ elk_fs_visitor::run_cs(bool allow_spilling)
|
||||
|
||||
allocate_registers(allow_spilling);
|
||||
|
||||
workaround_source_arf_before_eot();
|
||||
|
||||
return !failed;
|
||||
}
|
||||
|
||||
|
@@ -266,6 +266,7 @@ public:
|
||||
elk_fs_inst *inst);
|
||||
void insert_gfx4_post_send_dependency_workarounds(elk_bblock_t *block,
|
||||
elk_fs_inst *inst);
|
||||
bool workaround_source_arf_before_eot();
|
||||
void vfail(const char *msg, va_list args);
|
||||
void fail(const char *msg, ...);
|
||||
void limit_dispatch_width(unsigned n, const char *msg);
|
||||
|
Reference in New Issue
Block a user