zink: fix isNan mismatch between NIR and SPIR-V
SPIR-V and Vulkan allow implementations to optimize
float ops assuming inputs are not NaN unless SignedZeroInfNanPreserve
execution mode is enabled.
This means isnan which is turned to nir_op_fneu(a,a) and then emitted
as SpvOpFUnordNotEqual could potentially be optimised to a const 0.
isnan is special in this approach and therefore recognise this pattern,
and emit SpvOpIsNan instead of enabling SignedZeroInfNanPreserve which
will hurt performace of some vulkan drivers.
Fixes dEQP-GL45-ES31.functional.shaders.builtin_functions.common.isnan.*
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18910>
(cherry picked from commit 69c22dd817
)
This commit is contained in:
@@ -157,7 +157,7 @@
|
||||
"description": "zink: fix isNan mismatch between NIR and SPIR-V",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null
|
||||
},
|
||||
|
@@ -1811,29 +1811,29 @@ alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
|
||||
}
|
||||
|
||||
static SpvId
|
||||
get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src)
|
||||
get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src, SpvId *raw_value)
|
||||
{
|
||||
SpvId raw_value = get_alu_src_raw(ctx, alu, src);
|
||||
*raw_value = get_alu_src_raw(ctx, alu, src);
|
||||
|
||||
unsigned num_components = alu_instr_src_components(alu, src);
|
||||
unsigned bit_size = nir_src_bit_size(alu->src[src].src);
|
||||
nir_alu_type type = nir_op_infos[alu->op].input_types[src];
|
||||
|
||||
if (bit_size == 1)
|
||||
return raw_value;
|
||||
return *raw_value;
|
||||
else {
|
||||
switch (nir_alu_type_get_base_type(type)) {
|
||||
case nir_type_bool:
|
||||
unreachable("bool should have bit-size 1");
|
||||
|
||||
case nir_type_int:
|
||||
return bitcast_to_ivec(ctx, raw_value, bit_size, num_components);
|
||||
return bitcast_to_ivec(ctx, *raw_value, bit_size, num_components);
|
||||
|
||||
case nir_type_uint:
|
||||
return raw_value;
|
||||
return *raw_value;
|
||||
|
||||
case nir_type_float:
|
||||
return bitcast_to_fvec(ctx, raw_value, bit_size, num_components);
|
||||
return bitcast_to_fvec(ctx, *raw_value, bit_size, num_components);
|
||||
|
||||
default:
|
||||
unreachable("unknown nir_alu_type");
|
||||
@@ -1908,8 +1908,9 @@ static void
|
||||
emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
|
||||
{
|
||||
SpvId src[NIR_MAX_VEC_COMPONENTS];
|
||||
SpvId raw_src[NIR_MAX_VEC_COMPONENTS];
|
||||
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
|
||||
src[i] = get_alu_src(ctx, alu, i);
|
||||
src[i] = get_alu_src(ctx, alu, i, &raw_src[i]);
|
||||
|
||||
SpvId dest_type = get_dest_type(ctx, &alu->dest.dest,
|
||||
nir_op_infos[alu->op].output_type);
|
||||
@@ -2086,8 +2087,6 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
|
||||
BINOP(nir_op_uge, SpvOpUGreaterThanEqual)
|
||||
BINOP(nir_op_flt, SpvOpFOrdLessThan)
|
||||
BINOP(nir_op_fge, SpvOpFOrdGreaterThanEqual)
|
||||
BINOP(nir_op_feq, SpvOpFOrdEqual)
|
||||
BINOP(nir_op_fneu, SpvOpFUnordNotEqual)
|
||||
BINOP(nir_op_frem, SpvOpFRem)
|
||||
#undef BINOP
|
||||
|
||||
@@ -2152,6 +2151,23 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu)
|
||||
case nir_op_sge:
|
||||
unreachable("should already be lowered away");
|
||||
|
||||
case nir_op_fneu:
|
||||
assert(nir_op_infos[alu->op].num_inputs == 2);
|
||||
if (raw_src[0] == raw_src[1])
|
||||
result = emit_unop(ctx, SpvOpIsNan, dest_type, src[0]);
|
||||
else
|
||||
result = emit_binop(ctx, SpvOpFUnordNotEqual, dest_type, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case nir_op_feq:
|
||||
assert(nir_op_infos[alu->op].num_inputs == 2);
|
||||
if (raw_src[0] == raw_src[1])
|
||||
result = emit_unop(ctx, SpvOpLogicalNot, dest_type,
|
||||
emit_unop(ctx, SpvOpIsNan, dest_type, src[0]));
|
||||
else
|
||||
result = emit_binop(ctx, SpvOpFOrdEqual, dest_type, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case nir_op_flrp:
|
||||
assert(nir_op_infos[alu->op].num_inputs == 3);
|
||||
result = emit_builtin_triop(ctx, GLSLstd450FMix, dest_type,
|
||||
|
Reference in New Issue
Block a user