intel/compiler: Lower FIND_[LAST_]LIVE_CHANNEL in IR on Gfx8+
This allows the software scoreboarding pass, scheduler, and so on to handle the individual instructions and handle them, rather than trusting in the generator to do scoreboarding correctly when expanding the virtual instruction to multiple actual instructions. By using SHADER_OPCODE_READ_SR_REG, we also correctly handle the software scoreboarding workaround when reading DMask/VMask. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17530>
This commit is contained in:

committed by
Marge Bot

parent
6401d768b9
commit
49ee3ae9e8
@@ -5529,6 +5529,77 @@ fs_visitor::lower_derivatives()
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::lower_find_live_channel()
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
if (devinfo->ver < 8)
|
||||
return false;
|
||||
|
||||
bool packed_dispatch =
|
||||
brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data);
|
||||
bool vmask =
|
||||
stage == MESA_SHADER_FRAGMENT &&
|
||||
brw_wm_prog_data(stage_prog_data)->uses_vmask;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
|
||||
if (inst->opcode != SHADER_OPCODE_FIND_LIVE_CHANNEL &&
|
||||
inst->opcode != SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL)
|
||||
continue;
|
||||
|
||||
bool first = inst->opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL;
|
||||
|
||||
/* Getting the first active channel index is easy on Gfx8: Just find
|
||||
* the first bit set in the execution mask. The register exists on
|
||||
* HSW already but it reads back as all ones when the current
|
||||
* instruction has execution masking disabled, so it's kind of
|
||||
* useless there.
|
||||
*/
|
||||
fs_reg exec_mask(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
const fs_builder ubld = bld.at(block, inst).exec_all().group(1, 0);
|
||||
|
||||
/* ce0 doesn't consider the thread dispatch mask (DMask or VMask),
|
||||
* so combine the execution and dispatch masks to obtain the true mask.
|
||||
*
|
||||
* If we're looking for the first live channel, and we have packed
|
||||
* dispatch, we can skip this step, as we know all dispatched channels
|
||||
* will appear at the front of the mask.
|
||||
*/
|
||||
if (!(first && packed_dispatch)) {
|
||||
fs_reg mask = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld.emit(SHADER_OPCODE_READ_SR_REG, mask, brw_imm_ud(vmask ? 3 : 2));
|
||||
|
||||
/* Quarter control has the effect of magically shifting the value of
|
||||
* ce0 so you'll get the first/last active channel relative to the
|
||||
* specified quarter control as result.
|
||||
*/
|
||||
if (inst->group > 0)
|
||||
ubld.SHR(mask, mask, brw_imm_ud(ALIGN(inst->group, 8)));
|
||||
|
||||
ubld.AND(mask, exec_mask, mask);
|
||||
exec_mask = mask;
|
||||
}
|
||||
|
||||
if (first) {
|
||||
ubld.FBL(inst->dst, exec_mask);
|
||||
} else {
|
||||
fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 1);
|
||||
ubld.LZD(tmp, exec_mask);
|
||||
ubld.ADD(inst->dst, negate(tmp), brw_imm_uw(31));
|
||||
}
|
||||
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (progress)
|
||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::dump_instructions() const
|
||||
{
|
||||
@@ -6121,6 +6192,8 @@ fs_visitor::optimize()
|
||||
|
||||
lower_uniform_pull_constant_loads();
|
||||
|
||||
lower_find_live_channel();
|
||||
|
||||
validate();
|
||||
}
|
||||
|
||||
|
@@ -199,6 +199,7 @@ public:
|
||||
bool lower_simd_width();
|
||||
bool lower_barycentrics();
|
||||
bool lower_derivatives();
|
||||
bool lower_find_live_channel();
|
||||
bool lower_scoreboard();
|
||||
bool lower_sub_sat();
|
||||
bool opt_combine_constants();
|
||||
|
Reference in New Issue
Block a user