intel/fs: Make implied_mrf_writes() an fs_inst method.

This will be convenient in a later commit enabling SIMD32 fragment shaders, and happens to fix the calculation for MATH instructions which is currently inaccurate for SIMD-lowered instructions on Gen4-5 platforms (all of them on Gen4 in SIMD16 mode), since it was based on the shader's dispatch width rather than on the actual execution size of the instruction. This causes some shader-db noise on Gen4 due to the more compact register allocation interacting with the SEND dependency workarounds, but otherwise no major changes. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2019-12-27 16:38:26 -08:00
parent 591f146fd2
commit c20dc9b836
7 changed files with 17 additions and 18 deletions
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1164,16 +1164,16 @@ fs_inst::flags_written() const
 * Note that this is not the 0 or 1 implied writes in an actual gen
 * instruction -- the FS opcodes often generate MOVs in addition.
 */
-int
-fs_visitor::implied_mrf_writes(const fs_inst *inst) const
+unsigned
+fs_inst::implied_mrf_writes() const
 {
-   if (inst->mlen == 0)
+   if (mlen == 0)
      return 0;

-   if (inst->base_mrf == -1)
+   if (base_mrf == -1)
      return 0;

-   switch (inst->opcode) {
+   switch (opcode) {
   case SHADER_OPCODE_RCP:
   case SHADER_OPCODE_RSQ:
   case SHADER_OPCODE_SQRT:
@@ -1181,11 +1181,11 @@ fs_visitor::implied_mrf_writes(const fs_inst *inst) const
   case SHADER_OPCODE_LOG2:
   case SHADER_OPCODE_SIN:
   case SHADER_OPCODE_COS:
-      return 1 * dispatch_width / 8;
+      return 1 * exec_size / 8;
   case SHADER_OPCODE_POW:
   case SHADER_OPCODE_INT_QUOTIENT:
   case SHADER_OPCODE_INT_REMAINDER:
-      return 2 * dispatch_width / 8;
+      return 2 * exec_size / 8;
   case SHADER_OPCODE_TEX:
   case FS_OPCODE_TXB:
   case SHADER_OPCODE_TXD:
@@ -1201,14 +1201,14 @@ fs_visitor::implied_mrf_writes(const fs_inst *inst) const
      return 1;
   case FS_OPCODE_FB_WRITE:
   case FS_OPCODE_REP_FB_WRITE:
-      return inst->src[0].file == BAD_FILE ? 0 : 2;
+      return src[0].file == BAD_FILE ? 0 : 2;
   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
   case SHADER_OPCODE_GEN4_SCRATCH_READ:
      return 1;
   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
-      return inst->mlen;
+      return mlen;
   case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
-      return inst->mlen;
+      return mlen;
   default:
      unreachable("not reached");
   }
@@ -3494,7 +3494,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 	 /* Found a SEND instruction, which will include two or fewer
 	  * implied MRF writes.  We could do better here.
 	  */
-	 for (int i = 0; i < implied_mrf_writes(inst); i++) {
+	 for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
 	    last_mrf_move[inst->base_mrf + i] = NULL;
 	 }
      }
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -302,8 +302,6 @@ public:

   fs_reg interp_reg(int location, int channel);

-   int implied_mrf_writes(const fs_inst *inst) const;
-
   virtual void dump_instructions();
   virtual void dump_instructions(const char *name);
   void dump_instruction(backend_instruction *inst);
--- a/src/intel/compiler/brw_fs_bank_conflicts.cpp
+++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp
@@ -573,7 +573,7 @@ namespace {
         if (v->devinfo->gen >= 7) {
            assert(inst->dst.file != MRF);

-            for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+            for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
               const unsigned reg = GEN7_MRF_HACK_START + inst->base_mrf + i;
               constrained[p.atom_of_reg(reg)] = true;
            }
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -494,7 +494,7 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used)
      }

      if (inst->mlen > 0) {
-	 for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+	 for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
            mrf_used[inst->base_mrf + i] = true;
         }
      }
--- a/src/intel/compiler/brw_fs_scoreboard.cpp
+++ b/src/intel/compiler/brw_fs_scoreboard.cpp
@@ -896,7 +896,7 @@ namespace {
            }

            if (is_send(inst) && inst->base_mrf != -1) {
-               for (int j = 0; j < shader->implied_mrf_writes(inst); j++)
+               for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
                  add_dependency(ids, deps[ip], dependency_for_write(inst,
                     sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
            }
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -357,6 +357,7 @@ public:
   bool can_do_cmod();
   bool can_change_types() const;
   bool has_source_and_destination_hazard() const;
+   unsigned implied_mrf_writes() const;

   /**
    * Return whether \p arg is a control source of a virtual instruction which
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -1190,7 +1190,7 @@ fs_instruction_scheduler::calculate_deps()
      }

      if (inst->mlen > 0 && inst->base_mrf != -1) {
-         for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+         for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
            add_dep(last_mrf_write[inst->base_mrf + i], n);
            last_mrf_write[inst->base_mrf + i] = n;
         }
@@ -1313,7 +1313,7 @@ fs_instruction_scheduler::calculate_deps()
      }

      if (inst->mlen > 0 && inst->base_mrf != -1) {
-         for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+         for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) {
            last_mrf_write[inst->base_mrf + i] = n;
         }
      }