soft-fp64/fadd: Instead of tracking "b < a", track sign of the difference
Results on the 308 shaders extracted from the fp64 portion of the OpenGL CTS: Tiger Lake and Ice Lake had similar results. (Tiger Lake shown) total instructions in shared programs: 824403 -> 822766 (-0.20%) instructions in affected programs: 756260 -> 754623 (-0.22%) helped: 68 HURT: 1 helped stats (abs) min: 1 max: 118 x̄: 26.26 x̃: 18 helped stats (rel) min: 0.02% max: 0.97% x̄: 0.31% x̃: 0.23% HURT stats (abs) min: 149 max: 149 x̄: 149.00 x̃: 149 HURT stats (rel) min: 0.17% max: 0.17% x̄: 0.17% x̃: 0.17% 95% mean confidence interval for instructions value: -31.94 -15.51 95% mean confidence interval for instructions %-change: -0.37% -0.23% Instructions are helped. total cycles in shared programs: 6828935 -> 6816791 (-0.18%) cycles in affected programs: 6385191 -> 6373047 (-0.19%) helped: 73 HURT: 0 helped stats (abs) min: 2 max: 852 x̄: 166.36 x̃: 120 helped stats (rel) min: <.01% max: 0.80% x̄: 0.22% x̃: 0.17% 95% mean confidence interval for cycles value: -210.80 -121.91 95% mean confidence interval for cycles %-change: -0.27% -0.17% Cycles are helped. total fills in shared programs: 1442 -> 1497 (3.81%) fills in affected programs: 1442 -> 1497 (3.81%) helped: 0 HURT: 1 Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142>
This commit is contained in:
@@ -771,14 +771,14 @@ __fadd64(uint64_t a, uint64_t b)
|
|||||||
bExp = mix(bExp, 1, aExp == 0);
|
bExp = mix(bExp, 1, aExp == 0);
|
||||||
aExp = mix(aExp, 1, aExp == 0);
|
aExp = mix(aExp, 1, aExp == 0);
|
||||||
bool zexp_normal = false;
|
bool zexp_normal = false;
|
||||||
bool blta = true;
|
uint sign_of_difference = 0;
|
||||||
if (bFracHi < aFracHi) {
|
if (bFracHi < aFracHi) {
|
||||||
__sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
|
__sub64(aFracHi, aFracLo, bFracHi, bFracLo, zFrac0, zFrac1);
|
||||||
zexp_normal = true;
|
zexp_normal = true;
|
||||||
}
|
}
|
||||||
else if (aFracHi < bFracHi) {
|
else if (aFracHi < bFracHi) {
|
||||||
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
|
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
|
||||||
blta = false;
|
sign_of_difference = 0x80000000;
|
||||||
zexp_normal = true;
|
zexp_normal = true;
|
||||||
}
|
}
|
||||||
else if (bFracLo < aFracLo) {
|
else if (bFracLo < aFracLo) {
|
||||||
@@ -787,11 +787,11 @@ __fadd64(uint64_t a, uint64_t b)
|
|||||||
}
|
}
|
||||||
else if (aFracLo < bFracLo) {
|
else if (aFracLo < bFracLo) {
|
||||||
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
|
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
|
||||||
blta = false;
|
sign_of_difference = 0x80000000;
|
||||||
zexp_normal = true;
|
zexp_normal = true;
|
||||||
}
|
}
|
||||||
zExp = mix(bExp, aExp, blta);
|
zExp = mix(bExp, aExp, sign_of_difference == 0u);
|
||||||
aSign = mix(aSign ^ 0x80000000u, aSign, blta);
|
aSign ^= sign_of_difference;
|
||||||
uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u);
|
uint64_t retval_0 = __packFloat64(uint(FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) << 31, 0, 0u, 0u);
|
||||||
uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1);
|
uint64_t retval_1 = __normalizeRoundAndPackFloat64(aSign, zExp - 11, zFrac0, zFrac1);
|
||||||
return mix(retval_0, retval_1, zexp_normal);
|
return mix(retval_0, retval_1, zexp_normal);
|
||||||
|
Reference in New Issue
Block a user