nir/lower_mediump: Lower FS outputs to 16-bit when the value was upconverted.

Take this real-world (trimmed) shader: precision highp float; in lowp vec4 var_varVertexColor; layout(location = 0) out vec4 out_FragColor0; void main() { vec4 textureColor0 = vec4(1.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00); vec3 color = vec3(1.000000e+00, 1.000000e+00, 1.000000e+00); vec4 outColor = vec4(vec3((color).rgb), 1.000000e+00); (outColor *= vec4(var_varVertexColor)); (out_FragColor0 = outColor); } After opts, it's just a store from input to output. If we decide to lower the input to 16-bit, then as long as the driver can handle 16-bit outputs, it would be a good idea to demote the output and save the conversions. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18003>
2022-08-09 13:56:50 -07:00
parent dbb17f567a
commit 80b35fbefe
1 changed files with 27 additions and 3 deletions
--- a/src/compiler/nir/nir_lower_mediump.c
+++ b/src/compiler/nir/nir_lower_mediump.c
@@ -166,33 +166,57 @@ nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes,
                           !(nir->info.stage == MESA_SHADER_FRAGMENT &&
                             mode == nir_var_shader_out);

-         if (!sem.medium_precision ||
-             (is_varying && sem.location <= VARYING_SLOT_VAR31 &&
-              !(varying_mask & BITFIELD64_BIT(sem.location))))
+         if (is_varying && sem.location <= VARYING_SLOT_VAR31 &&
+            !(varying_mask & BITFIELD64_BIT(sem.location))) {
            continue; /* can't lower */
+         }

         if (nir_intrinsic_has_src_type(intr)) {
            /* Stores. */
            nir_alu_type type = nir_intrinsic_src_type(intr);

+            nir_op upconvert_op;
            switch (type) {
            case nir_type_float32:
               convert = nir_f2fmp;
+               upconvert_op = nir_op_f2f32;
               break;
            case nir_type_int32:
+               convert = nir_i2imp;
+               upconvert_op = nir_op_i2i32;
+               break;
            case nir_type_uint32:
               convert = nir_i2imp;
+               upconvert_op = nir_op_u2u32;
               break;
            default:
               continue; /* already lowered? */
            }

+            /* Check that the output is mediump, or (for fragment shader
+             * outputs) is a conversion from a mediump value, and lower it to
+             * mediump.  Note that we don't automatically apply it to
+             * gl_FragDepth, as GLSL ES declares it highp and so hardware such
+             * as Adreno a6xx doesn't expect a half-float output for it.
+             */
+            nir_ssa_def *val = intr->src[0].ssa;
+            bool is_fragdepth = (nir->info.stage == MESA_SHADER_FRAGMENT &&
+                                 sem.location == FRAG_RESULT_DEPTH);
+            if (!sem.medium_precision &&
+                (is_varying || is_fragdepth || val->parent_instr->type != nir_instr_type_alu ||
+                 nir_instr_as_alu(val->parent_instr)->op != upconvert_op)) {
+               continue;
+            }
+
            /* Convert the 32-bit store into a 16-bit store. */
            b.cursor = nir_before_instr(&intr->instr);
            nir_instr_rewrite_src_ssa(&intr->instr, &intr->src[0],
                                      convert(&b, intr->src[0].ssa));
            nir_intrinsic_set_src_type(intr, (type & ~32) | 16);
         } else {
+            if (!sem.medium_precision)
+               continue;
+
            /* Loads. */
            nir_alu_type type = nir_intrinsic_dest_type(intr);