nir: intel/compiler: Move ufind_msb lowering to NIR
Fossil-db results: All Intel platforms had similar results. (Ice Lake shown) Cycles in all programs: 9098346105 -> 9098333765 (-0.0%) Cycles helped: 6 Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19042>
This commit is contained in:
@@ -3413,6 +3413,8 @@ typedef struct nir_shader_compiler_options {
|
||||
bool lower_find_msb_to_reverse;
|
||||
/** Lowers ifind_msb to uclz and logic ops*/
|
||||
bool lower_ifind_msb_to_uclz;
|
||||
/** Lowers ufind_msb to 31-uclz */
|
||||
bool lower_ufind_msb_to_uclz;
|
||||
/** Lowers find_lsb to ufind_msb and logic ops */
|
||||
bool lower_find_lsb;
|
||||
bool lower_uadd_carry;
|
||||
|
@@ -2041,6 +2041,10 @@ optimizations.extend([
|
||||
('ufind_msb_rev', 'value')),
|
||||
'options->lower_find_msb_to_reverse'),
|
||||
|
||||
(('ufind_msb', 'value@32'),
|
||||
('isub', 31, ('uclz', 'value')),
|
||||
'options->lower_ufind_msb_to_uclz'),
|
||||
|
||||
(('uclz', a), ('umin', 32, ('ufind_msb_rev', a)), 'options->lower_uclz'),
|
||||
|
||||
(('find_lsb', 'value'),
|
||||
|
@@ -34,6 +34,7 @@
|
||||
.lower_scmp = true, \
|
||||
.lower_flrp16 = true, \
|
||||
.lower_fmod = true, \
|
||||
.lower_ufind_msb_to_uclz = true, \
|
||||
.lower_uadd_carry = true, \
|
||||
.lower_usub_borrow = true, \
|
||||
.lower_flrp64 = true, \
|
||||
|
@@ -610,26 +610,6 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_find_msb_using_lzd(const fs_builder &bld,
|
||||
const fs_reg &result,
|
||||
const fs_reg &src)
|
||||
{
|
||||
fs_inst *inst;
|
||||
fs_reg temp = src;
|
||||
|
||||
bld.LZD(retype(result, BRW_REGISTER_TYPE_UD),
|
||||
retype(temp, BRW_REGISTER_TYPE_UD));
|
||||
|
||||
/* LZD counts from the MSB side, while GLSL's findMSB() wants the count
|
||||
* from the LSB side. Subtract the result from 31 to convert the MSB
|
||||
* count into an LSB count. If no bits are set, LZD will return 32.
|
||||
* 31-32 = -1, which is exactly what findMSB() is supposed to return.
|
||||
*/
|
||||
inst = bld.ADD(result, retype(result, BRW_REGISTER_TYPE_D), brw_imm_d(31));
|
||||
inst->src[0].negate = true;
|
||||
}
|
||||
|
||||
static brw_rnd_mode
|
||||
brw_rnd_mode_from_nir_op (const nir_op op) {
|
||||
switch (op) {
|
||||
@@ -1677,13 +1657,6 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
|
||||
bld.CBIT(result, op[0]);
|
||||
break;
|
||||
|
||||
case nir_op_ufind_msb: {
|
||||
assert(nir_dest_bit_size(instr->dest.dest) == 32);
|
||||
assert(nir_src_bit_size(instr->src[0].src) == 32);
|
||||
emit_find_msb_using_lzd(bld, result, op[0]);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_uclz:
|
||||
assert(nir_dest_bit_size(instr->dest.dest) == 32);
|
||||
assert(nir_src_bit_size(instr->src[0].src) == 32);
|
||||
|
@@ -829,27 +829,6 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_find_msb_using_lzd(const vec4_builder &bld,
|
||||
const dst_reg &dst,
|
||||
const src_reg &src)
|
||||
{
|
||||
vec4_instruction *inst;
|
||||
src_reg temp = src;
|
||||
|
||||
bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD),
|
||||
retype(temp, BRW_REGISTER_TYPE_UD));
|
||||
|
||||
/* LZD counts from the MSB side, while GLSL's findMSB() wants the count
|
||||
* from the LSB side. Subtract the result from 31 to convert the MSB count
|
||||
* into an LSB count. If no bits are set, LZD will return 32. 31-32 = -1,
|
||||
* which is exactly what findMSB() is supposed to return.
|
||||
*/
|
||||
inst = bld.ADD(dst, retype(src_reg(dst), BRW_REGISTER_TYPE_D),
|
||||
brw_imm_d(31));
|
||||
inst->src[0].negate = true;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src)
|
||||
{
|
||||
@@ -1634,11 +1613,6 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
emit(CBIT(dst, op[0]));
|
||||
break;
|
||||
|
||||
case nir_op_ufind_msb:
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0]);
|
||||
break;
|
||||
|
||||
case nir_op_ifind_msb: {
|
||||
assert(nir_dest_bit_size(instr->dest.dest) == 32);
|
||||
assert(nir_src_bit_size(instr->src[0].src) == 32);
|
||||
|
Reference in New Issue
Block a user