intel/compiler: Lower FIND_[LAST_]LIVE_CHANNEL in IR on Gfx8+

This allows the software scoreboarding pass, scheduler, and so on
to handle the individual instructions and handle them, rather than
trusting in the generator to do scoreboarding correctly when expanding
the virtual instruction to multiple actual instructions.

By using SHADER_OPCODE_READ_SR_REG, we also correctly handle the
software scoreboarding workaround when reading DMask/VMask.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17530>
This commit is contained in:
Kenneth Graunke
2022-06-06 02:35:09 -07:00
committed by Marge Bot
parent 6401d768b9
commit 49ee3ae9e8
2 changed files with 74 additions and 0 deletions

View File

@@ -5529,6 +5529,77 @@ fs_visitor::lower_derivatives()
return progress;
}
bool
fs_visitor::lower_find_live_channel()
{
bool progress = false;
if (devinfo->ver < 8)
return false;
bool packed_dispatch =
brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data);
bool vmask =
stage == MESA_SHADER_FRAGMENT &&
brw_wm_prog_data(stage_prog_data)->uses_vmask;
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
if (inst->opcode != SHADER_OPCODE_FIND_LIVE_CHANNEL &&
inst->opcode != SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL)
continue;
bool first = inst->opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL;
/* Getting the first active channel index is easy on Gfx8: Just find
* the first bit set in the execution mask. The register exists on
* HSW already but it reads back as all ones when the current
* instruction has execution masking disabled, so it's kind of
* useless there.
*/
fs_reg exec_mask(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
const fs_builder ubld = bld.at(block, inst).exec_all().group(1, 0);
/* ce0 doesn't consider the thread dispatch mask (DMask or VMask),
* so combine the execution and dispatch masks to obtain the true mask.
*
* If we're looking for the first live channel, and we have packed
* dispatch, we can skip this step, as we know all dispatched channels
* will appear at the front of the mask.
*/
if (!(first && packed_dispatch)) {
fs_reg mask = ubld.vgrf(BRW_REGISTER_TYPE_UD);
ubld.emit(SHADER_OPCODE_READ_SR_REG, mask, brw_imm_ud(vmask ? 3 : 2));
/* Quarter control has the effect of magically shifting the value of
* ce0 so you'll get the first/last active channel relative to the
* specified quarter control as result.
*/
if (inst->group > 0)
ubld.SHR(mask, mask, brw_imm_ud(ALIGN(inst->group, 8)));
ubld.AND(mask, exec_mask, mask);
exec_mask = mask;
}
if (first) {
ubld.FBL(inst->dst, exec_mask);
} else {
fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 1);
ubld.LZD(tmp, exec_mask);
ubld.ADD(inst->dst, negate(tmp), brw_imm_uw(31));
}
inst->remove(block);
progress = true;
}
if (progress)
invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
return progress;
}
void
fs_visitor::dump_instructions() const
{
@@ -6121,6 +6192,8 @@ fs_visitor::optimize()
lower_uniform_pull_constant_loads();
lower_find_live_channel();
validate();
}

View File

@@ -199,6 +199,7 @@ public:
bool lower_simd_width();
bool lower_barycentrics();
bool lower_derivatives();
bool lower_find_live_channel();
bool lower_scoreboard();
bool lower_sub_sat();
bool opt_combine_constants();