nir: Make the load_store_vectorizer provide align_mul + align_offset.

It was passing an encoding of the two that wasn't good for ensuring "Don't combine loads that would make us straddle a vec4 boundary" for nir_lower_ubo_vec4. Reviewed-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6612>
2020-09-08 10:58:49 -07:00
parent 9c5a793dc7
commit 5f757bb95c
5 changed files with 27 additions and 7 deletions
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -383,7 +383,8 @@ type_size(const struct glsl_type *type, bool bindless)
 }

 bool
-mem_vectorize_callback(unsigned align, unsigned bit_size,
+mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
+                       unsigned bit_size,
                       unsigned num_components, unsigned high_offset,
                       nir_intrinsic_instr *low, nir_intrinsic_instr *high)
 {
@@ -394,6 +395,12 @@ mem_vectorize_callback(unsigned align, unsigned bit_size,
   if (bit_size * num_components > 128)
      return false;

+   uint32_t align;
+   if (align_offset)
+      align = 1 << (ffs(align_offset) - 1);
+   else
+      align = align_mul;
+
   switch (low->intrinsic) {
   case nir_intrinsic_load_global:
   case nir_intrinsic_store_global:
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -4851,7 +4851,9 @@ bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,

 bool nir_opt_conditional_discard(nir_shader *shader);

-typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size,
+typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
+                                              unsigned align_offset,
+                                              unsigned bit_size,
                                              unsigned num_components, unsigned high_offset,
                                              nir_intrinsic_instr *low, nir_intrinsic_instr *high);

--- a/src/compiler/nir/nir_opt_load_store_vectorize.c
+++ b/src/compiler/nir/nir_opt_load_store_vectorize.c
@@ -667,8 +667,9 @@ new_bitsize_acceptable(struct vectorize_ctx *ctx, unsigned new_bit_size,
   if (new_bit_size / common_bit_size > NIR_MAX_VEC_COMPONENTS)
      return false;

-   uint32_t align = low->align_offset ? 1 << (ffs(low->align_offset) - 1) : low->align_mul;
-   if (!ctx->callback(align, new_bit_size, new_num_components,
+   if (!ctx->callback(low->align_mul,
+                      low->align_offset,
+                      new_bit_size, new_num_components,
                      high_offset, low->intrin, high->intrin))
      return false;

--- a/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
+++ b/src/compiler/nir/tests/load_store_vectorizer_tests.cpp
@@ -70,7 +70,8 @@ protected:
   bool test_alu(nir_instr *instr, nir_op op);
   bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);

-   static bool mem_vectorize_callback(unsigned align, unsigned bit_size,
+   static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
+                                      unsigned bit_size,
                                      unsigned num_components, unsigned high_offset,
                                      nir_intrinsic_instr *low, nir_intrinsic_instr *high);
   static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
@@ -363,7 +364,8 @@ bool nir_load_store_vectorize_test::test_alu_def(
 }

 bool nir_load_store_vectorize_test::mem_vectorize_callback(
-   unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset,
+   unsigned align_mul, unsigned align_offset, unsigned bit_size,
+   unsigned num_components, unsigned high_offset,
   nir_intrinsic_instr *low, nir_intrinsic_instr *high)
 {
   return bit_size / 8;
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -855,7 +855,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
 }

 static bool
-brw_nir_should_vectorize_mem(unsigned align, unsigned bit_size,
+brw_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
+                             unsigned bit_size,
                             unsigned num_components, unsigned high_offset,
                             nir_intrinsic_instr *low,
                             nir_intrinsic_instr *high)
@@ -873,6 +874,13 @@ brw_nir_should_vectorize_mem(unsigned align, unsigned bit_size,
   if (num_components > 4)
      return false;

+
+   uint32_t align;
+   if (align_offset)
+      align = 1 << (ffs(align_offset) - 1);
+   else
+      align = align_mul;
+
   if (align < bit_size / 8)
      return false;