soft-fp64: Optimize __fmin64 and __fmax64 by using different evaluation order [v2]

v2: Go to extra effort to avoid flow control inserted to implement
short-circuit evaluation rules.

Results on the 308 shaders extracted from the fp64 portion of the OpenGL
CTS:

Tiger Lake and Ice Lake had similar results. (Tiger Lake shown)
total instructions in shared programs: 797779 -> 796849 (-0.12%)
instructions in affected programs: 3499 -> 2569 (-26.58%)
helped: 21
HURT: 0
helped stats (abs) min: 8 max: 112 x̄: 44.29 x̃: 44
helped stats (rel) min: 16.09% max: 33.15% x̄: 25.72% x̃: 24.62%
95% mean confidence interval for instructions value: -55.94 -32.63
95% mean confidence interval for instructions %-change: -28.14% -23.30%
Instructions are helped.

total cycles in shared programs: 6601355 -> 6588351 (-0.20%)
cycles in affected programs: 25376 -> 12372 (-51.25%)
helped: 21
HURT: 0
helped stats (abs) min: 156 max: 1410 x̄: 619.24 x̃: 526
helped stats (rel) min: 42.39% max: 53.98% x̄: 50.12% x̃: 50.75%
95% mean confidence interval for cycles value: -776.58 -461.89
95% mean confidence interval for cycles %-change: -51.57% -48.67%
Cycles are helped.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> [v1]
Reviewed-by: Matt Turner <mattst88@gmail.com> [v1]
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142>
This commit is contained in:
Ian Romanick
2020-03-03 10:21:18 -08:00
committed by Marge Bot
parent 617a69107e
commit 5b07f542e5

View File

@@ -1784,21 +1784,29 @@ __fround64(uint64_t __a)
uint64_t uint64_t
__fmin64(uint64_t a, uint64_t b) __fmin64(uint64_t a, uint64_t b)
{ {
if (__is_nan(a)) return b; /* This weird layout matters. Doing the "obvious" thing results in extra
if (__is_nan(b)) return a; * flow control being inserted to implement the short-circuit evaluation
* rules. Flow control is bad!
*/
bool b_nan = __is_nan(b);
bool a_lt_b = __flt64_nonnan(a, b);
bool a_nan = __is_nan(a);
if (__flt64_nonnan(a, b)) return a; return (b_nan || a_lt_b) && !a_nan ? a : b;
return b;
} }
uint64_t uint64_t
__fmax64(uint64_t a, uint64_t b) __fmax64(uint64_t a, uint64_t b)
{ {
if (__is_nan(a)) return b; /* This weird layout matters. Doing the "obvious" thing results in extra
if (__is_nan(b)) return a; * flow control being inserted to implement the short-circuit evaluation
* rules. Flow control is bad!
*/
bool b_nan = __is_nan(b);
bool a_lt_b = __flt64_nonnan(a, b);
bool a_nan = __is_nan(a);
if (__flt64_nonnan(a, b)) return b; return (b_nan || a_lt_b) && !a_nan ? b : a;
return a;
} }
uint64_t uint64_t