nir: make fdph lowering match fdot
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20812>
This commit is contained in:
@@ -294,15 +294,29 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
|
|||||||
nir_ssa_def *src0_vec = nir_ssa_for_alu_src(b, alu, 0);
|
nir_ssa_def *src0_vec = nir_ssa_for_alu_src(b, alu, 0);
|
||||||
nir_ssa_def *src1_vec = nir_ssa_for_alu_src(b, alu, 1);
|
nir_ssa_def *src1_vec = nir_ssa_for_alu_src(b, alu, 1);
|
||||||
|
|
||||||
nir_ssa_def *sum[4];
|
/* Only use reverse order for imprecise fdph, see explanation in lower_fdot. */
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
bool reverse_order = !b->exact;
|
||||||
sum[i] = nir_fmul(b, nir_channel(b, src0_vec, i),
|
if (will_lower_ffma(b->shader, alu->dest.dest.ssa.bit_size)) {
|
||||||
nir_channel(b, src1_vec, i));
|
nir_ssa_def *sum[4];
|
||||||
}
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
sum[3] = nir_channel(b, src1_vec, 3);
|
int dest = reverse_order ? 3 - i : i;
|
||||||
|
sum[dest] = nir_fmul(b, nir_channel(b, src0_vec, i),
|
||||||
|
nir_channel(b, src1_vec, i));
|
||||||
|
}
|
||||||
|
sum[reverse_order ? 0 : 3] = nir_channel(b, src1_vec, 3);
|
||||||
|
|
||||||
return nir_fadd(b, nir_fadd(b, sum[0], sum[1]),
|
return nir_fadd(b, nir_fadd(b, nir_fadd(b, sum[0], sum[1]), sum[2]), sum[3]);
|
||||||
nir_fadd(b, sum[2], sum[3]));
|
} else if (reverse_order) {
|
||||||
|
nir_ssa_def *sum = nir_channel(b, src1_vec, 3);
|
||||||
|
for (int i = 2; i >= 0; i--)
|
||||||
|
sum = nir_ffma(b, nir_channel(b, src0_vec, i), nir_channel(b, src1_vec, i), sum);
|
||||||
|
return sum;
|
||||||
|
} else {
|
||||||
|
nir_ssa_def *sum = nir_fmul(b, nir_channel(b, src0_vec, 0), nir_channel(b, src1_vec, 0));
|
||||||
|
sum = nir_ffma(b, nir_channel(b, src0_vec, 1), nir_channel(b, src1_vec, 1), sum);
|
||||||
|
sum = nir_ffma(b, nir_channel(b, src0_vec, 2), nir_channel(b, src1_vec, 2), sum);
|
||||||
|
return nir_fadd(b, sum, nir_channel(b, src1_vec, 3));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_op_pack_64_2x32: {
|
case nir_op_pack_64_2x32: {
|
||||||
|
Reference in New Issue
Block a user