nir: intel/compiler: Move ifind_msb lowering to NIR

Unlike ufind_msb, ifind_msb is only defined in NIR for 32-bit values, so
no @32 annotation is required.

No shader-db or fossil-db changes on any Intel platform.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19042>
This commit is contained in:
Ian Romanick
2022-10-10 13:35:01 -07:00
committed by Marge Bot
parent 15c6c859cf
commit 0cc7bf63b7
5 changed files with 52 additions and 86 deletions

View File

@@ -3411,6 +3411,8 @@ typedef struct nir_shader_compiler_options {
bool lower_ifind_msb;
/** Lowers ifind_msb and ufind_msb to reverse variants */
bool lower_find_msb_to_reverse;
/** Lowers ifind_msb to uclz and logic ops*/
bool lower_ifind_msb_to_uclz;
/** Lowers find_lsb to ufind_msb and logic ops */
bool lower_find_lsb;
bool lower_uadd_carry;

View File

@@ -2013,6 +2013,28 @@ optimizations.extend([
('ifind_msb_rev', 'value')),
'options->lower_find_msb_to_reverse'),
# uclz of an absolute value source almost always does the right thing.
# There are a couple problem values:
#
# * 0x80000000. Since abs(0x80000000) == 0x80000000, uclz returns 0.
# However, findMSB(int(0x80000000)) == 30.
#
# * 0xffffffff. Since abs(0xffffffff) == 1, uclz returns 31. Section 8.8
# (Integer Functions) of the GLSL 4.50 spec says:
#
# For a value of zero or negative one, -1 will be returned.
#
# * Negative powers of two. uclz(abs(-(1<<x))) returns x, but
# findMSB(-(1<<x)) should return x-1.
#
# For all negative number cases, including 0x80000000 and 0xffffffff, the
# correct value is obtained from uclz if instead of negating the (already
# negative) value the logical-not is used. A conditional logical-not can
# be achieved by (x ^ (x >> 31)).
(('ifind_msb', 'value'),
('isub', 31, ('uclz', ('ixor', 'value', ('ishr', 'value', 31)))),
'options->lower_ifind_msb_to_uclz'),
(('ufind_msb', 'value'),
('bcsel', ('ige', ('ufind_msb_rev', 'value'), 0),
('isub', 31, ('ufind_msb_rev', 'value')),

View File

@@ -189,6 +189,7 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
nir_options->lower_rotate = devinfo->ver < 11;
nir_options->lower_bitfield_reverse = devinfo->ver < 7;
nir_options->lower_find_lsb = devinfo->ver < 7;
nir_options->lower_ifind_msb_to_uclz = devinfo->ver < 7;
nir_options->has_iadd3 = devinfo->verx10 >= 125;
nir_options->has_sdot_4x8 = devinfo->ver >= 12;

View File

@@ -613,38 +613,11 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
static void
emit_find_msb_using_lzd(const fs_builder &bld,
const fs_reg &result,
const fs_reg &src,
bool is_signed)
const fs_reg &src)
{
fs_inst *inst;
fs_reg temp = src;
if (is_signed) {
/* LZD of an absolute value source almost always does the right
* thing. There are two problem values:
*
* * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns
* 0. However, findMSB(int(0x80000000)) == 30.
*
* * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns
* 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
*
* For a value of zero or negative one, -1 will be returned.
*
* * Negative powers of two. LZD(abs(-(1<<x))) returns x, but
* findMSB(-(1<<x)) should return x-1.
*
* For all negative number cases, including 0x80000000 and
* 0xffffffff, the correct value is obtained from LZD if instead of
* negating the (already negative) value the logical-not is used. A
* conditional logical-not can be achieved in two instructions.
*/
temp = bld.vgrf(BRW_REGISTER_TYPE_D);
bld.ASR(temp, src, brw_imm_d(31));
bld.XOR(temp, temp, src);
}
bld.LZD(retype(result, BRW_REGISTER_TYPE_UD),
retype(temp, BRW_REGISTER_TYPE_UD));
@@ -1704,7 +1677,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
case nir_op_ufind_msb: {
assert(nir_dest_bit_size(instr->dest.dest) < 64);
emit_find_msb_using_lzd(bld, result, op[0], false);
emit_find_msb_using_lzd(bld, result, op[0]);
break;
}
@@ -1715,23 +1688,20 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
case nir_op_ifind_msb: {
assert(nir_dest_bit_size(instr->dest.dest) < 64);
assert(devinfo->ver >= 7);
if (devinfo->ver < 7) {
emit_find_msb_using_lzd(bld, result, op[0], true);
} else {
bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
/* FBH counts from the MSB side, while GLSL's findMSB() wants the
* count from the LSB side. If FBH didn't return an error
* (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
* count into an LSB count.
/* FBH counts from the MSB side, while GLSL's findMSB() wants the count
* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
* subtract the result from 31 to convert the MSB count into an LSB
* count.
*/
bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
inst = bld.ADD(result, result, brw_imm_d(31));
inst->predicate = BRW_PREDICATE_NORMAL;
inst->src[0].negate = true;
}
break;
}

View File

@@ -832,38 +832,11 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr,
static void
emit_find_msb_using_lzd(const vec4_builder &bld,
const dst_reg &dst,
const src_reg &src,
bool is_signed)
const src_reg &src)
{
vec4_instruction *inst;
src_reg temp = src;
if (is_signed) {
/* LZD of an absolute value source almost always does the right
* thing. There are two problem values:
*
* * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns
* 0. However, findMSB(int(0x80000000)) == 30.
*
* * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns
* 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
*
* For a value of zero or negative one, -1 will be returned.
*
* * Negative powers of two. LZD(abs(-(1<<x))) returns x, but
* findMSB(-(1<<x)) should return x-1.
*
* For all negative number cases, including 0x80000000 and
* 0xffffffff, the correct value is obtained from LZD if instead of
* negating the (already negative) value the logical-not is used. A
* conditional logical-not can be achieved in two instructions.
*/
temp = src_reg(bld.vgrf(BRW_REGISTER_TYPE_D));
bld.ASR(dst_reg(temp), src, brw_imm_d(31));
bld.XOR(dst_reg(temp), temp, src);
}
bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD),
retype(temp, BRW_REGISTER_TYPE_UD));
@@ -1661,30 +1634,28 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
case nir_op_ufind_msb:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0], false);
emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0]);
break;
case nir_op_ifind_msb: {
assert(nir_dest_bit_size(instr->dest.dest) < 64);
assert(devinfo->ver >= 7);
vec4_builder bld = vec4_builder(this).at_end();
src_reg src(dst);
if (devinfo->ver < 7) {
emit_find_msb_using_lzd(bld, dst, op[0], true);
} else {
emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
/* FBH counts from the MSB side, while GLSL's findMSB() wants the
* count from the LSB side. If FBH didn't return an error
* (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
* count into an LSB count.
/* FBH counts from the MSB side, while GLSL's findMSB() wants the count
* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
* subtract the result from 31 to convert the MSB count into an LSB
* count.
*/
bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
inst = bld.ADD(dst, src, brw_imm_d(31));
inst->predicate = BRW_PREDICATE_NORMAL;
inst->src[0].negate = true;
}
break;
}