pan/bi: Fuse [US][8|16]_TO_F32 ops
This combines nicely with the previous isel change. Now GLSL like float(int_x >> 24) will generate a single machine instruction S8_TO_F32 int_x.b3 Noticed when debugging KHR-GLES31.core.shader_bitfield_operation.unpackSnorm4x8.0 ...but naturally no real workloads care. Helped shaders are from Android games that appear to have run through a translator, naturally. total instructions in shared programs: 2674831 -> 2674783 (<.01%) instructions in affected programs: 11493 -> 11445 (-0.42%) helped: 31 HURT: 0 helped stats (abs) min: 1.0 max: 3.0 x̄: 1.55 x̃: 1 helped stats (rel) min: 0.16% max: 2.90% x̄: 0.51% x̃: 0.41% 95% mean confidence interval for instructions value: -1.87 -1.22 95% mean confidence interval for instructions %-change: -0.69% -0.33% Instructions are helped. total cvt in shared programs: 14128.84 -> 14128.09 (<.01%) cvt in affected programs: 78.17 -> 77.42 (-0.96%) helped: 31 HURT: 0 helped stats (abs) min: 0.015625 max: 0.046875 x̄: 0.02 x̃: 0 helped stats (rel) min: 0.36% max: 4.26% x̄: 1.28% x̃: 1.20% 95% mean confidence interval for cvt value: -0.03 -0.02 95% mean confidence interval for cvt %-change: -1.62% -0.94% Cvt are helped. total quadwords in shared programs: 1449920 -> 1449840 (<.01%) quadwords in affected programs: 2184 -> 2104 (-3.66%) helped: 10 HURT: 0 helped stats (abs) min: 8.0 max: 8.0 x̄: 8.00 x̃: 8 helped stats (rel) min: 2.44% max: 5.88% x̄: 4.11% x̃: 4.76% 95% mean confidence interval for quadwords value: -8.00 -8.00 95% mean confidence interval for quadwords %-change: -5.11% -3.12% Quadwords are helped. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17857>
This commit is contained in:

committed by
Marge Bot

parent
eab1d36643
commit
c88b8cbee3
@@ -152,6 +152,39 @@ bi_fuse_discard_fcmp(bi_instr *I, bi_instr *mod, unsigned arch)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* S32_TO_F32(S8_TO_S32(x)) -> S8_TO_F32 and friends. Round modes don't matter
|
||||
* because all 8-bit and 16-bit integers may be represented exactly as fp32.
|
||||
*/
|
||||
struct {
|
||||
enum bi_opcode inner;
|
||||
enum bi_opcode outer;
|
||||
enum bi_opcode replacement;
|
||||
} bi_small_int_patterns[] = {
|
||||
{ BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32 },
|
||||
{ BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32 },
|
||||
{ BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32 },
|
||||
{ BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32 },
|
||||
{ BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32 },
|
||||
{ BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32 },
|
||||
};
|
||||
|
||||
static inline void
|
||||
bi_fuse_small_int_to_f32(bi_instr *I, bi_instr *mod)
|
||||
{
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
|
||||
if (I->op != bi_small_int_patterns[i].outer)
|
||||
continue;
|
||||
if (mod->op != bi_small_int_patterns[i].inner)
|
||||
continue;
|
||||
|
||||
assert(I->src[0].swizzle == BI_SWIZZLE_H01);
|
||||
I->src[0] = mod->src[0];
|
||||
I->round = BI_ROUND_NONE;
|
||||
I->op = bi_small_int_patterns[i].replacement;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_mod_prop_forward(bi_context *ctx)
|
||||
{
|
||||
@@ -173,6 +206,7 @@ bi_opt_mod_prop_forward(bi_context *ctx)
|
||||
unsigned size = bi_opcode_props[I->op].size;
|
||||
|
||||
bi_fuse_discard_fcmp(I, mod, ctx->arch);
|
||||
bi_fuse_small_int_to_f32(I, mod);
|
||||
|
||||
if (bi_is_fabsneg(mod->op, size)) {
|
||||
if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
|
||||
|
Reference in New Issue
Block a user