glsl/float64: handle signed zero with min/max
Ensure the following identities hold to match IEEE-754-2019 and upcoming NIR: min(-0, +0) = -0 min(+0, -0) = -0 max(-0, +0) = +0 max(+0, -0) = +0 To implement, we specialize a version of flt64_nonnan. The regular flt64 has extra logic to handle signed zero, so this version is actually simpler. So in addition to the bug fix, this is an optimization. Compute shaders from KHR-GL46.gpu_shader_fp64.builtin.max_dvec4 before and after: before: 136 inst, 122 alu, 122 fscib, 4 ic, 1006 bytes, 39 regs, 28 uniforms after: 104 inst, 90 alu, 90 fscib, 4 ic, 766 bytes, 39 regs, 28 uniforms I will happy take a 24% reduction in instruction count as the cost of standards conformance ^_^ Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Konstantin Seurer <konstantin.seurer@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30075>
This commit is contained in:

committed by
Marge Bot

parent
6f48fa4ebe
commit
26de3d5366
@@ -223,6 +223,31 @@ __flt64_nonnan(uint64_t __a, uint64_t __b)
|
||||
return !__feq64_nonnan(__a, __b) && (lt != both_negative);
|
||||
}
|
||||
|
||||
bool
|
||||
__flt64_nonnan_minmax(uint64_t __a, uint64_t __b)
|
||||
{
|
||||
uvec2 a = unpackUint2x32(__a);
|
||||
uvec2 b = unpackUint2x32(__b);
|
||||
|
||||
/* See __flt64_nonnan. For implementing fmin/fmax, we compare -0 < 0, so the
|
||||
* implied logic is a bit simpler:
|
||||
*
|
||||
* both_negative(a, b) ? a > b : a < b
|
||||
*
|
||||
* If a == b, it doesn't matter what we return, so that's equivalent to:
|
||||
*
|
||||
* both_negative(a, b) ? a >= b : a < b
|
||||
* both_negative(a, b) ? !(a < b) : a < b
|
||||
* both_negative(a, b) ^ (a < b)
|
||||
*
|
||||
* XOR is again implemented using !=.
|
||||
*/
|
||||
bool lt = ilt64(a.y, a.x, b.y, b.x);
|
||||
bool both_negative = (a.y & b.y & 0x80000000u) != 0;
|
||||
|
||||
return (lt != both_negative);
|
||||
}
|
||||
|
||||
/* Returns true if the double-precision floating-point value `a' is less than
|
||||
* the corresponding value `b', and false otherwise. The comparison is performed
|
||||
* according to the IEEE Standard for Floating-Point Arithmetic.
|
||||
@@ -1691,7 +1716,7 @@ __fmin64(uint64_t a, uint64_t b)
|
||||
* rules. Flow control is bad!
|
||||
*/
|
||||
bool b_nan = __is_nan(b);
|
||||
bool a_lt_b = __flt64_nonnan(a, b);
|
||||
bool a_lt_b = __flt64_nonnan_minmax(a, b);
|
||||
bool a_nan = __is_nan(a);
|
||||
|
||||
return (b_nan || a_lt_b) && !a_nan ? a : b;
|
||||
@@ -1705,7 +1730,7 @@ __fmax64(uint64_t a, uint64_t b)
|
||||
* rules. Flow control is bad!
|
||||
*/
|
||||
bool b_nan = __is_nan(b);
|
||||
bool a_lt_b = __flt64_nonnan(a, b);
|
||||
bool a_lt_b = __flt64_nonnan_minmax(a, b);
|
||||
bool a_nan = __is_nan(a);
|
||||
|
||||
return (b_nan || a_lt_b) && !a_nan ? b : a;
|
||||
|
Reference in New Issue
Block a user