nir: Make the load_store_vectorizer provide align_mul + align_offset.

It was passing an encoding of the two that wasn't good for ensuring "Don't
combine loads that would make us straddle a vec4 boundary" for
nir_lower_ubo_vec4.

Reviewed-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6612>
This commit is contained in:
Eric Anholt
2020-09-08 10:58:49 -07:00
committed by Marge Bot
parent 9c5a793dc7
commit 5f757bb95c
5 changed files with 27 additions and 7 deletions

View File

@@ -383,7 +383,8 @@ type_size(const struct glsl_type *type, bool bindless)
}
bool
mem_vectorize_callback(unsigned align, unsigned bit_size,
mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
unsigned bit_size,
unsigned num_components, unsigned high_offset,
nir_intrinsic_instr *low, nir_intrinsic_instr *high)
{
@@ -394,6 +395,12 @@ mem_vectorize_callback(unsigned align, unsigned bit_size,
if (bit_size * num_components > 128)
return false;
uint32_t align;
if (align_offset)
align = 1 << (ffs(align_offset) - 1);
else
align = align_mul;
switch (low->intrinsic) {
case nir_intrinsic_load_global:
case nir_intrinsic_store_global:

View File

@@ -4851,7 +4851,9 @@ bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
bool nir_opt_conditional_discard(nir_shader *shader);
typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size,
typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
unsigned align_offset,
unsigned bit_size,
unsigned num_components, unsigned high_offset,
nir_intrinsic_instr *low, nir_intrinsic_instr *high);

View File

@@ -667,8 +667,9 @@ new_bitsize_acceptable(struct vectorize_ctx *ctx, unsigned new_bit_size,
if (new_bit_size / common_bit_size > NIR_MAX_VEC_COMPONENTS)
return false;
uint32_t align = low->align_offset ? 1 << (ffs(low->align_offset) - 1) : low->align_mul;
if (!ctx->callback(align, new_bit_size, new_num_components,
if (!ctx->callback(low->align_mul,
low->align_offset,
new_bit_size, new_num_components,
high_offset, low->intrin, high->intrin))
return false;

View File

@@ -70,7 +70,8 @@ protected:
bool test_alu(nir_instr *instr, nir_op op);
bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
static bool mem_vectorize_callback(unsigned align, unsigned bit_size,
static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
unsigned bit_size,
unsigned num_components, unsigned high_offset,
nir_intrinsic_instr *low, nir_intrinsic_instr *high);
static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
@@ -363,7 +364,8 @@ bool nir_load_store_vectorize_test::test_alu_def(
}
bool nir_load_store_vectorize_test::mem_vectorize_callback(
unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset,
unsigned align_mul, unsigned align_offset, unsigned bit_size,
unsigned num_components, unsigned high_offset,
nir_intrinsic_instr *low, nir_intrinsic_instr *high)
{
return bit_size / 8;

View File

@@ -855,7 +855,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
}
static bool
brw_nir_should_vectorize_mem(unsigned align, unsigned bit_size,
brw_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
unsigned bit_size,
unsigned num_components, unsigned high_offset,
nir_intrinsic_instr *low,
nir_intrinsic_instr *high)
@@ -873,6 +874,13 @@ brw_nir_should_vectorize_mem(unsigned align, unsigned bit_size,
if (num_components > 4)
return false;
uint32_t align;
if (align_offset)
align = 1 << (ffs(align_offset) - 1);
else
align = align_mul;
if (align < bit_size / 8)
return false;