nir: Make the load_store_vectorizer provide align_mul + align_offset.
It was passing an encoding of the two that wasn't good for ensuring "Don't combine loads that would make us straddle a vec4 boundary" for nir_lower_ubo_vec4. Reviewed-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6612>
This commit is contained in:
@@ -383,7 +383,8 @@ type_size(const struct glsl_type *type, bool bindless)
|
||||
}
|
||||
|
||||
bool
|
||||
mem_vectorize_callback(unsigned align, unsigned bit_size,
|
||||
mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components, unsigned high_offset,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high)
|
||||
{
|
||||
@@ -394,6 +395,12 @@ mem_vectorize_callback(unsigned align, unsigned bit_size,
|
||||
if (bit_size * num_components > 128)
|
||||
return false;
|
||||
|
||||
uint32_t align;
|
||||
if (align_offset)
|
||||
align = 1 << (ffs(align_offset) - 1);
|
||||
else
|
||||
align = align_mul;
|
||||
|
||||
switch (low->intrinsic) {
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_store_global:
|
||||
|
@@ -4851,7 +4851,9 @@ bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
|
||||
|
||||
bool nir_opt_conditional_discard(nir_shader *shader);
|
||||
|
||||
typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size,
|
||||
typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
|
||||
unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components, unsigned high_offset,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high);
|
||||
|
||||
|
@@ -667,8 +667,9 @@ new_bitsize_acceptable(struct vectorize_ctx *ctx, unsigned new_bit_size,
|
||||
if (new_bit_size / common_bit_size > NIR_MAX_VEC_COMPONENTS)
|
||||
return false;
|
||||
|
||||
uint32_t align = low->align_offset ? 1 << (ffs(low->align_offset) - 1) : low->align_mul;
|
||||
if (!ctx->callback(align, new_bit_size, new_num_components,
|
||||
if (!ctx->callback(low->align_mul,
|
||||
low->align_offset,
|
||||
new_bit_size, new_num_components,
|
||||
high_offset, low->intrin, high->intrin))
|
||||
return false;
|
||||
|
||||
|
@@ -70,7 +70,8 @@ protected:
|
||||
bool test_alu(nir_instr *instr, nir_op op);
|
||||
bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
|
||||
|
||||
static bool mem_vectorize_callback(unsigned align, unsigned bit_size,
|
||||
static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components, unsigned high_offset,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high);
|
||||
static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
|
||||
@@ -363,7 +364,8 @@ bool nir_load_store_vectorize_test::test_alu_def(
|
||||
}
|
||||
|
||||
bool nir_load_store_vectorize_test::mem_vectorize_callback(
|
||||
unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset,
|
||||
unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||
unsigned num_components, unsigned high_offset,
|
||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high)
|
||||
{
|
||||
return bit_size / 8;
|
||||
|
@@ -855,7 +855,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_nir_should_vectorize_mem(unsigned align, unsigned bit_size,
|
||||
brw_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||
unsigned bit_size,
|
||||
unsigned num_components, unsigned high_offset,
|
||||
nir_intrinsic_instr *low,
|
||||
nir_intrinsic_instr *high)
|
||||
@@ -873,6 +874,13 @@ brw_nir_should_vectorize_mem(unsigned align, unsigned bit_size,
|
||||
if (num_components > 4)
|
||||
return false;
|
||||
|
||||
|
||||
uint32_t align;
|
||||
if (align_offset)
|
||||
align = 1 << (ffs(align_offset) - 1);
|
||||
else
|
||||
align = align_mul;
|
||||
|
||||
if (align < bit_size / 8)
|
||||
return false;
|
||||
|
||||
|
Reference in New Issue
Block a user