soft-fp64/fadd: Common code optimization for differing sign case
This is basically the same ideas from the previous 4 commits applied to the aSign != bSign part... and all smashed into one commit. The shader hurt for spill and / or fills is from KHR-GL46.gpu_shader_fp64.builtin.inverse_dmat4. Results on the 308 shaders extracted from the fp64 portion of the OpenGL CTS: Tiger Lake total instructions in shared programs: 787258 -> 683638 (-13.16%) instructions in affected programs: 725435 -> 621815 (-14.28%) helped: 74 HURT: 0 helped stats (abs) min: 152 max: 10261 x̄: 1400.27 x̃: 975 helped stats (rel) min: 11.61% max: 20.92% x̄: 15.40% x̃: 14.86% 95% mean confidence interval for instructions value: -1740.11 -1060.43 95% mean confidence interval for instructions %-change: -16.01% -14.79% Instructions are helped. total cycles in shared programs: 6483227 -> 5458858 (-15.80%) cycles in affected programs: 6051245 -> 5026876 (-16.93%) helped: 74 HURT: 0 helped stats (abs) min: 1566 max: 95474 x̄: 13842.82 x̃: 9757 helped stats (rel) min: 13.94% max: 23.26% x̄: 17.98% x̃: 17.57% 95% mean confidence interval for cycles value: -17104.25 -10581.40 95% mean confidence interval for cycles %-change: -18.61% -17.35% Cycles are helped. total spills in shared programs: 553 -> 445 (-19.53%) spills in affected programs: 553 -> 445 (-19.53%) helped: 1 HURT: 0 total fills in shared programs: 1307 -> 1323 (1.22%) fills in affected programs: 1307 -> 1323 (1.22%) helped: 0 HURT: 1 Ice Lake total instructions in shared programs: 781216 -> 678470 (-13.15%) instructions in affected programs: 720088 -> 617342 (-14.27%) helped: 74 HURT: 0 helped stats (abs) min: 153 max: 8863 x̄: 1388.46 x̃: 975 helped stats (rel) min: 11.24% max: 21.03% x̄: 15.47% x̃: 15.01% 95% mean confidence interval for instructions value: -1703.57 -1073.35 95% mean confidence interval for instructions %-change: -16.09% -14.85% Instructions are helped. total cycles in shared programs: 6464085 -> 5453997 (-15.63%) cycles in affected programs: 6031771 -> 5021683 (-16.75%) helped: 74 HURT: 0 helped stats (abs) min: 1552 max: 90317 x̄: 13649.84 x̃: 9650 helped stats (rel) min: 13.84% max: 23.11% x̄: 17.83% x̃: 17.41% 95% mean confidence interval for cycles value: -16802.89 -10496.79 95% mean confidence interval for cycles %-change: -18.46% -17.21% Cycles are helped. total spills in shared programs: 279 -> 368 (31.90%) spills in affected programs: 279 -> 368 (31.90%) helped: 0 HURT: 1 total fills in shared programs: 973 -> 1155 (18.71%) fills in affected programs: 973 -> 1155 (18.71%) helped: 0 HURT: 1 Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4142>
This commit is contained in:
@@ -740,15 +740,23 @@ __fadd64(uint64_t a, uint64_t b)
|
||||
|
||||
__shortShift64Left(aFracHi, aFracLo, 10, aFracHi, aFracLo);
|
||||
__shortShift64Left(bFracHi, bFracLo, 10, bFracHi, bFracLo);
|
||||
if (0 < expDiff) {
|
||||
if (expDiff != 0) {
|
||||
uint zFrac0;
|
||||
uint zFrac1;
|
||||
|
||||
if (expDiff < 0) {
|
||||
EXCHANGE(aFracHi, bFracHi);
|
||||
EXCHANGE(aFracLo, bFracLo);
|
||||
EXCHANGE(aExp, bExp);
|
||||
aSign ^= 0x80000000u;
|
||||
}
|
||||
|
||||
if (aExp == 0x7FF) {
|
||||
bool propagate = (aFracHi | aFracLo) != 0u;
|
||||
return mix(a, __propagateFloat64NaN(a, b), propagate);
|
||||
return mix(__packFloat64(aSign, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
|
||||
}
|
||||
expDiff = mix(expDiff, expDiff - 1, bExp == 0);
|
||||
|
||||
expDiff = mix(abs(expDiff), abs(expDiff) - 1, bExp == 0);
|
||||
bFracHi = mix(bFracHi | 0x40000000u, bFracHi, bExp == 0);
|
||||
__shift64RightJamming(bFracHi, bFracLo, expDiff, bFracHi, bFracLo);
|
||||
aFracHi |= 0x40000000u;
|
||||
@@ -757,24 +765,6 @@ __fadd64(uint64_t a, uint64_t b)
|
||||
--zExp;
|
||||
return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
|
||||
}
|
||||
if (expDiff < 0) {
|
||||
uint zFrac0;
|
||||
uint zFrac1;
|
||||
|
||||
if (bExp == 0x7FF) {
|
||||
bool propagate = (bFracHi | bFracLo) != 0u;
|
||||
return mix(__packFloat64(aSign ^ 0x80000000u, 0x7ff, 0u, 0u), __propagateFloat64NaN(a, b), propagate);
|
||||
}
|
||||
expDiff = mix(expDiff, expDiff + 1, aExp == 0);
|
||||
aFracHi = mix(aFracHi | 0x40000000u, aFracHi, aExp == 0);
|
||||
__shift64RightJamming(aFracHi, aFracLo, - expDiff, aFracHi, aFracLo);
|
||||
bFracHi |= 0x40000000u;
|
||||
__sub64(bFracHi, bFracLo, aFracHi, aFracLo, zFrac0, zFrac1);
|
||||
zExp = bExp;
|
||||
aSign ^= 0x80000000u;
|
||||
--zExp;
|
||||
return __normalizeRoundAndPackFloat64(aSign, zExp - 10, zFrac0, zFrac1);
|
||||
}
|
||||
if (aExp == 0x7FF) {
|
||||
bool propagate = ((aFracHi | bFracHi) | (aFracLo | bFracLo)) != 0u;
|
||||
return mix(0xFFFFFFFFFFFFFFFFUL, __propagateFloat64NaN(a, b), propagate);
|
||||
|
Reference in New Issue
Block a user