intel: Add helper to calculate GPGPU_WALKER::RightExecutionMask

Suggested by Jason. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5142>
2020-05-27 08:05:41 -07:00
parent 78e400d4a5
commit bccf2a25a8
4 changed files with 18 additions and 18 deletions
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -6624,14 +6624,6 @@ iris_upload_compute_state(struct iris_context *ice,
      }
   }

-   uint32_t remainder = group_size & (simd_size - 1);
-   uint32_t right_mask;
-
-   if (remainder > 0)
-      right_mask = ~0u >> (32 - remainder);
-   else
-      right_mask = ~0u >> (32 - simd_size);
-
 #define GPGPU_DISPATCHDIMX 0x2500
 #define GPGPU_DISPATCHDIMY 0x2504
 #define GPGPU_DISPATCHDIMZ 0x2508
@@ -6653,6 +6645,8 @@ iris_upload_compute_state(struct iris_context *ice,
      }
   }

+   const uint32_t right_mask = brw_cs_right_mask(group_size, simd_size);
+
   iris_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) {
      ggw.IndirectParameterEnable    = grid->indirect != NULL;
      ggw.SIMDSize                   = simd_size / 16;
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -1529,6 +1529,19 @@ brw_cs_simd_size_for_group_size(const struct gen_device_info *devinfo,
                                const struct brw_cs_prog_data *cs_prog_data,
                                unsigned group_size);

+/**
+ * Calculate the RightExecutionMask field used in GPGPU_WALKER.
+ */
+static inline unsigned
+brw_cs_right_mask(unsigned group_size, unsigned simd_size)
+{
+   const uint32_t remainder = group_size & (simd_size - 1);
+   if (remainder > 0)
+      return ~0u >> (32 - remainder);
+   else
+      return ~0u >> (32 - simd_size);
+}
+
 /**
 * Return true if the given shader stage is dispatched contiguously by the
 * relevant fixed function starting from channel 0 of the SIMD thread, which
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -2326,12 +2326,8 @@ compute_pipeline_create(
   anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);

   const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
-   uint32_t remainder = cs_params.group_size & (cs_params.simd_size - 1);

-   if (remainder > 0)
-      pipeline->cs_right_mask = ~0u >> (32 - remainder);
-   else
-      pipeline->cs_right_mask = ~0u >> (32 - cs_params.simd_size);
+   pipeline->cs_right_mask = brw_cs_right_mask(cs_params.group_size, cs_params.simd_size);

   const uint32_t vfe_curbe_allocation =
      ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads +
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -4489,11 +4489,8 @@ genX(emit_gpgpu_walker)(struct brw_context *brw)

   const struct brw_cs_parameters cs_params = brw_cs_get_parameters(brw);

-   uint32_t right_mask = 0xffffffffu >> (32 - cs_params.simd_size);
-   const unsigned right_non_aligned =
-      cs_params.group_size & (cs_params.simd_size - 1);
-   if (right_non_aligned != 0)
-      right_mask >>= (cs_params.simd_size - right_non_aligned);
+   const uint32_t right_mask =
+      brw_cs_right_mask(cs_params.group_size, cs_params.simd_size);

   brw_batch_emit(brw, GENX(GPGPU_WALKER), ggw) {
      ggw.IndirectParameterEnable      = indirect;