nir: intel/compiler: Move ufind_msb lowering to NIR

Fossil-db results:

All Intel platforms had similar results. (Ice Lake shown)
Cycles in all programs: 9098346105 -> 9098333765 (-0.0%)
Cycles helped: 6

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19042>
This commit is contained in:
Ian Romanick
2022-10-10 13:21:52 -07:00
committed by Marge Bot
parent a4052e70ea
commit 28311f9d02
5 changed files with 7 additions and 53 deletions

View File

@@ -3413,6 +3413,8 @@ typedef struct nir_shader_compiler_options {
bool lower_find_msb_to_reverse;
/** Lowers ifind_msb to uclz and logic ops*/
bool lower_ifind_msb_to_uclz;
/** Lowers ufind_msb to 31-uclz */
bool lower_ufind_msb_to_uclz;
/** Lowers find_lsb to ufind_msb and logic ops */
bool lower_find_lsb;
bool lower_uadd_carry;

View File

@@ -2041,6 +2041,10 @@ optimizations.extend([
('ufind_msb_rev', 'value')),
'options->lower_find_msb_to_reverse'),
(('ufind_msb', 'value@32'),
('isub', 31, ('uclz', 'value')),
'options->lower_ufind_msb_to_uclz'),
(('uclz', a), ('umin', 32, ('ufind_msb_rev', a)), 'options->lower_uclz'),
(('find_lsb', 'value'),

View File

@@ -34,6 +34,7 @@
.lower_scmp = true, \
.lower_flrp16 = true, \
.lower_fmod = true, \
.lower_ufind_msb_to_uclz = true, \
.lower_uadd_carry = true, \
.lower_usub_borrow = true, \
.lower_flrp64 = true, \

View File

@@ -610,26 +610,6 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
return true;
}
static void
emit_find_msb_using_lzd(const fs_builder &bld,
const fs_reg &result,
const fs_reg &src)
{
fs_inst *inst;
fs_reg temp = src;
bld.LZD(retype(result, BRW_REGISTER_TYPE_UD),
retype(temp, BRW_REGISTER_TYPE_UD));
/* LZD counts from the MSB side, while GLSL's findMSB() wants the count
* from the LSB side. Subtract the result from 31 to convert the MSB
* count into an LSB count. If no bits are set, LZD will return 32.
* 31-32 = -1, which is exactly what findMSB() is supposed to return.
*/
inst = bld.ADD(result, retype(result, BRW_REGISTER_TYPE_D), brw_imm_d(31));
inst->src[0].negate = true;
}
static brw_rnd_mode
brw_rnd_mode_from_nir_op (const nir_op op) {
switch (op) {
@@ -1677,13 +1657,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
bld.CBIT(result, op[0]);
break;
case nir_op_ufind_msb: {
assert(nir_dest_bit_size(instr->dest.dest) == 32);
assert(nir_src_bit_size(instr->src[0].src) == 32);
emit_find_msb_using_lzd(bld, result, op[0]);
break;
}
case nir_op_uclz:
assert(nir_dest_bit_size(instr->dest.dest) == 32);
assert(nir_src_bit_size(instr->src[0].src) == 32);

View File

@@ -829,27 +829,6 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr,
return true;
}
static void
emit_find_msb_using_lzd(const vec4_builder &bld,
const dst_reg &dst,
const src_reg &src)
{
vec4_instruction *inst;
src_reg temp = src;
bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD),
retype(temp, BRW_REGISTER_TYPE_UD));
/* LZD counts from the MSB side, while GLSL's findMSB() wants the count
* from the LSB side. Subtract the result from 31 to convert the MSB count
* into an LSB count. If no bits are set, LZD will return 32. 31-32 = -1,
* which is exactly what findMSB() is supposed to return.
*/
inst = bld.ADD(dst, retype(src_reg(dst), BRW_REGISTER_TYPE_D),
brw_imm_d(31));
inst->src[0].negate = true;
}
void
vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src)
{
@@ -1634,11 +1613,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
emit(CBIT(dst, op[0]));
break;
case nir_op_ufind_msb:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0]);
break;
case nir_op_ifind_msb: {
assert(nir_dest_bit_size(instr->dest.dest) == 32);
assert(nir_src_bit_size(instr->src[0].src) == 32);