From a4a34cd3234858dbfe990ede81d3a61edf95c9da Mon Sep 17 00:00:00 2001
From: Gert Wollny <gert.wollny@collabora.com>
Date: Sat, 9 Apr 2022 09:49:25 +0200
Subject: [PATCH] virgl: Propagate precice flag through moves

NIR doesn't propagate precise through moves, and with NTT the
last output is usually preceded by a move, so that we no longer
see that the evaluation of some value is supposed to be exact,
and, hence we can't decorate the outputs accordingly.

Fixes with NTT:
 dEQP-GLES31.functional.tessellation.common_edge.
     triangles_equal_spacing_precise
     triangles_fractional_odd_spacing_precise
     triangles_fractional_even_spacing_precise
     quads_equal_spacing_precise
     quads_fractional_odd_spacing_precise
     quads_fractional_even_spacing_precise

v2: Don't clear the precise flag when we hit a mov, because we may
    hit a if/else construct like below and we don't track branches

    IF X
       TEMP[0] = OP_PRECICE ...
    ELSE
       TEMP[0] = MOV CONST[]
    ENDIF

    Thanks Emma for pointing out the problem.

v2: allocate precise handling flags to transform_prolog (Emma)

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15836>
---
 src/gallium/drivers/virgl/virgl_tgsi.c | 48 +++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/virgl/virgl_tgsi.c b/src/gallium/drivers/virgl/virgl_tgsi.c
index af19401ebdb..f14583a3464 100644
--- a/src/gallium/drivers/virgl/virgl_tgsi.c
+++ b/src/gallium/drivers/virgl/virgl_tgsi.c
@@ -70,6 +70,8 @@ struct virgl_transform_context {
    unsigned num_writemask_fixups;
 
    struct virgl_input_temp input_temp[INPUT_TEMP_COUNT];
+
+   uint32_t *precise_flags;
 };
 
 static void
@@ -245,6 +247,8 @@ virgl_tgsi_transform_prolog(struct tgsi_transform_context * ctx)
    }
 
    virgl_mov_input_temp_uint(ctx, &vtctx->input_temp[INPUT_TEMP_HELPER_INVOCATION]);
+
+   vtctx->precise_flags = calloc((vtctx->next_temp + 7)/8, sizeof(uint32_t));
 }
 
 static void
@@ -271,6 +275,45 @@ virgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx,
    if (!vtctx->has_precise && inst->Instruction.Precise)
       inst->Instruction.Precise = 0;
 
+   /* For outputs NTT adds a final mov op but NIR doesn't propagate precise with moves,
+    * so that we don't see whether the assignment is from a precise instruction, but
+    * we need to know this to set the output decoration correctly, so propagate the
+    * precise flag with TGSI */
+   for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
+      if (inst->Dst[i].Register.File == TGSI_FILE_TEMPORARY) {
+         uint32_t index = inst->Dst[i].Register.Index / 8;
+         uint32_t bits = inst->Dst[i].Register.WriteMask << (inst->Dst[i].Register.Index % 8);
+
+         /* Since we re-use temps set and clear the precise flag according to the last use
+          * for the register index and written components. Since moves are not marked
+          * as precise originally, and we may end up with an if/else clause that assignes
+          * a precise result in the if branche, but does a simple move from a constant
+          * on the else branche, we don't clear the flag when we hit a mov.
+          * We do the conservatiove approach here, because virglrenderer emits different temp
+          * ranges, and we don't want to mark all temps as precise only because we have
+          * one precise output */
+         if (inst->Instruction.Precise)
+            vtctx->precise_flags[index] |= bits;
+         else if (inst->Instruction.Opcode != TGSI_OPCODE_MOV)
+            vtctx->precise_flags[index] &= ~bits;
+      } else if (inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
+         for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
+            if (inst->Src[i].Register.File == TGSI_FILE_TEMPORARY) {
+               uint32_t index = inst->Src[i].Register.Index / 8;
+               uint32_t read_mask = (1 << inst->Src[i].Register.SwizzleX) |
+                                    (1 << inst->Src[i].Register.SwizzleY) |
+                                    (1 << inst->Src[i].Register.SwizzleZ) |
+                                    (1 << inst->Src[i].Register.SwizzleW);
+               uint32_t bits = read_mask << (inst->Dst[i].Register.Index % 8);
+               if (vtctx->precise_flags[index] & bits) {
+                  inst->Instruction.Precise = 1;
+                  break;
+               }
+            }
+         }
+      }
+   }
+
    /* virglrenderer can run out of space in internal buffers for immediates as
     * tex operands.  Move the first immediate tex arg to a temp to save space in
     * the buffer.
@@ -377,5 +420,8 @@ struct tgsi_token *virgl_tgsi_transform(struct virgl_screen *vscreen, const stru
 
    tgsi_scan_shader(tokens_in, &transform.info);
 
-   return tgsi_transform_shader(tokens_in, newLen, &transform.base);
+   struct tgsi_token *new_tokens = tgsi_transform_shader(tokens_in, newLen, &transform.base);
+   free(transform.precise_flags);
+   return new_tokens;
+
 }