pan/bi: Fuse [US][8|16]_TO_F32 ops

This combines nicely with the previous isel change. Now GLSL like float(int_x >> 24) will generate a single machine instruction S8_TO_F32 int_x.b3 Noticed when debugging KHR-GLES31.core.shader_bitfield_operation.unpackSnorm4x8.0 ...but naturally no real workloads care. Helped shaders are from Android games that appear to have run through a translator, naturally. total instructions in shared programs: 2674831 -> 2674783 (<.01%) instructions in affected programs: 11493 -> 11445 (-0.42%) helped: 31 HURT: 0 helped stats (abs) min: 1.0 max: 3.0 x̄: 1.55 x̃: 1 helped stats (rel) min: 0.16% max: 2.90% x̄: 0.51% x̃: 0.41% 95% mean confidence interval for instructions value: -1.87 -1.22 95% mean confidence interval for instructions %-change: -0.69% -0.33% Instructions are helped. total cvt in shared programs: 14128.84 -> 14128.09 (<.01%) cvt in affected programs: 78.17 -> 77.42 (-0.96%) helped: 31 HURT: 0 helped stats (abs) min: 0.015625 max: 0.046875 x̄: 0.02 x̃: 0 helped stats (rel) min: 0.36% max: 4.26% x̄: 1.28% x̃: 1.20% 95% mean confidence interval for cvt value: -0.03 -0.02 95% mean confidence interval for cvt %-change: -1.62% -0.94% Cvt are helped. total quadwords in shared programs: 1449920 -> 1449840 (<.01%) quadwords in affected programs: 2184 -> 2104 (-3.66%) helped: 10 HURT: 0 helped stats (abs) min: 8.0 max: 8.0 x̄: 8.00 x̃: 8 helped stats (rel) min: 2.44% max: 5.88% x̄: 4.11% x̃: 4.76% 95% mean confidence interval for quadwords value: -8.00 -8.00 95% mean confidence interval for quadwords %-change: -5.11% -3.12% Quadwords are helped. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17857>
2022-08-19 11:51:20 -04:00
parent eab1d36643
commit c88b8cbee3
1 changed files with 34 additions and 0 deletions
--- a/src/panfrost/bifrost/bi_opt_mod_props.c
+++ b/src/panfrost/bifrost/bi_opt_mod_props.c
@@ -152,6 +152,39 @@ bi_fuse_discard_fcmp(bi_instr *I, bi_instr *mod, unsigned arch)
        }
 }

+/*
+ * S32_TO_F32(S8_TO_S32(x)) -> S8_TO_F32 and friends. Round modes don't matter
+ * because all 8-bit and 16-bit integers may be represented exactly as fp32.
+ */
+struct {
+        enum bi_opcode inner;
+        enum bi_opcode outer;
+        enum bi_opcode replacement;
+} bi_small_int_patterns[] = {
+        { BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32 },
+        { BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32 },
+        { BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32 },
+        { BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32 },
+        { BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32 },
+        { BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32 },
+};
+
+static inline void
+bi_fuse_small_int_to_f32(bi_instr *I, bi_instr *mod)
+{
+        for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
+                if (I->op != bi_small_int_patterns[i].outer)
+                        continue;
+                if (mod->op != bi_small_int_patterns[i].inner)
+                        continue;
+
+                assert(I->src[0].swizzle == BI_SWIZZLE_H01);
+                I->src[0] = mod->src[0];
+                I->round = BI_ROUND_NONE;
+                I->op = bi_small_int_patterns[i].replacement;
+        }
+}
+
 void
 bi_opt_mod_prop_forward(bi_context *ctx)
 {
@@ -173,6 +206,7 @@ bi_opt_mod_prop_forward(bi_context *ctx)
                        unsigned size = bi_opcode_props[I->op].size;

                        bi_fuse_discard_fcmp(I, mod, ctx->arch);
+                        bi_fuse_small_int_to_f32(I, mod);

                        if (bi_is_fabsneg(mod->op, size)) {
                                if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))