pan/bi: Fuse [US][8|16]_TO_F32 ops

This combines nicely with the previous isel change. Now GLSL like

   float(int_x >> 24)

will generate a single machine instruction

   S8_TO_F32 int_x.b3

Noticed when debugging

   KHR-GLES31.core.shader_bitfield_operation.unpackSnorm4x8.0

...but naturally no real workloads care. Helped shaders are from Android games
that appear to have run through a translator, naturally.

total instructions in shared programs: 2674831 -> 2674783 (<.01%)
instructions in affected programs: 11493 -> 11445 (-0.42%)
helped: 31
HURT: 0
helped stats (abs) min: 1.0 max: 3.0 x̄: 1.55 x̃: 1
helped stats (rel) min: 0.16% max: 2.90% x̄: 0.51% x̃: 0.41%
95% mean confidence interval for instructions value: -1.87 -1.22
95% mean confidence interval for instructions %-change: -0.69% -0.33%
Instructions are helped.

total cvt in shared programs: 14128.84 -> 14128.09 (<.01%)
cvt in affected programs: 78.17 -> 77.42 (-0.96%)
helped: 31
HURT: 0
helped stats (abs) min: 0.015625 max: 0.046875 x̄: 0.02 x̃: 0
helped stats (rel) min: 0.36% max: 4.26% x̄: 1.28% x̃: 1.20%
95% mean confidence interval for cvt value: -0.03 -0.02
95% mean confidence interval for cvt %-change: -1.62% -0.94%
Cvt are helped.

total quadwords in shared programs: 1449920 -> 1449840 (<.01%)
quadwords in affected programs: 2184 -> 2104 (-3.66%)
helped: 10
HURT: 0
helped stats (abs) min: 8.0 max: 8.0 x̄: 8.00 x̃: 8
helped stats (rel) min: 2.44% max: 5.88% x̄: 4.11% x̃: 4.76%
95% mean confidence interval for quadwords value: -8.00 -8.00
95% mean confidence interval for quadwords %-change: -5.11% -3.12%
Quadwords are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17857>
This commit is contained in:
Alyssa Rosenzweig
2022-08-19 11:51:20 -04:00
committed by Marge Bot
parent eab1d36643
commit c88b8cbee3

View File

@@ -152,6 +152,39 @@ bi_fuse_discard_fcmp(bi_instr *I, bi_instr *mod, unsigned arch)
}
}
/*
* S32_TO_F32(S8_TO_S32(x)) -> S8_TO_F32 and friends. Round modes don't matter
* because all 8-bit and 16-bit integers may be represented exactly as fp32.
*/
struct {
enum bi_opcode inner;
enum bi_opcode outer;
enum bi_opcode replacement;
} bi_small_int_patterns[] = {
{ BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32 },
{ BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32 },
{ BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32 },
{ BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32 },
{ BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32 },
{ BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32 },
};
static inline void
bi_fuse_small_int_to_f32(bi_instr *I, bi_instr *mod)
{
for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
if (I->op != bi_small_int_patterns[i].outer)
continue;
if (mod->op != bi_small_int_patterns[i].inner)
continue;
assert(I->src[0].swizzle == BI_SWIZZLE_H01);
I->src[0] = mod->src[0];
I->round = BI_ROUND_NONE;
I->op = bi_small_int_patterns[i].replacement;
}
}
void
bi_opt_mod_prop_forward(bi_context *ctx)
{
@@ -173,6 +206,7 @@ bi_opt_mod_prop_forward(bi_context *ctx)
unsigned size = bi_opcode_props[I->op].size;
bi_fuse_discard_fcmp(I, mod, ctx->arch);
bi_fuse_small_int_to_f32(I, mod);
if (bi_is_fabsneg(mod->op, size)) {
if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))