nir: Make the load_store_vectorizer provide align_mul + align_offset.
It was passing an encoding of the two that wasn't good for ensuring "Don't combine loads that would make us straddle a vec4 boundary" for nir_lower_ubo_vec4. Reviewed-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6612>
This commit is contained in:
@@ -383,7 +383,8 @@ type_size(const struct glsl_type *type, bool bindless)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
mem_vectorize_callback(unsigned align, unsigned bit_size,
|
mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
|
||||||
|
unsigned bit_size,
|
||||||
unsigned num_components, unsigned high_offset,
|
unsigned num_components, unsigned high_offset,
|
||||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high)
|
nir_intrinsic_instr *low, nir_intrinsic_instr *high)
|
||||||
{
|
{
|
||||||
@@ -394,6 +395,12 @@ mem_vectorize_callback(unsigned align, unsigned bit_size,
|
|||||||
if (bit_size * num_components > 128)
|
if (bit_size * num_components > 128)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
uint32_t align;
|
||||||
|
if (align_offset)
|
||||||
|
align = 1 << (ffs(align_offset) - 1);
|
||||||
|
else
|
||||||
|
align = align_mul;
|
||||||
|
|
||||||
switch (low->intrinsic) {
|
switch (low->intrinsic) {
|
||||||
case nir_intrinsic_load_global:
|
case nir_intrinsic_load_global:
|
||||||
case nir_intrinsic_store_global:
|
case nir_intrinsic_store_global:
|
||||||
|
@@ -4851,7 +4851,9 @@ bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
|
|||||||
|
|
||||||
bool nir_opt_conditional_discard(nir_shader *shader);
|
bool nir_opt_conditional_discard(nir_shader *shader);
|
||||||
|
|
||||||
typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size,
|
typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
|
||||||
|
unsigned align_offset,
|
||||||
|
unsigned bit_size,
|
||||||
unsigned num_components, unsigned high_offset,
|
unsigned num_components, unsigned high_offset,
|
||||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high);
|
nir_intrinsic_instr *low, nir_intrinsic_instr *high);
|
||||||
|
|
||||||
|
@@ -667,8 +667,9 @@ new_bitsize_acceptable(struct vectorize_ctx *ctx, unsigned new_bit_size,
|
|||||||
if (new_bit_size / common_bit_size > NIR_MAX_VEC_COMPONENTS)
|
if (new_bit_size / common_bit_size > NIR_MAX_VEC_COMPONENTS)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
uint32_t align = low->align_offset ? 1 << (ffs(low->align_offset) - 1) : low->align_mul;
|
if (!ctx->callback(low->align_mul,
|
||||||
if (!ctx->callback(align, new_bit_size, new_num_components,
|
low->align_offset,
|
||||||
|
new_bit_size, new_num_components,
|
||||||
high_offset, low->intrin, high->intrin))
|
high_offset, low->intrin, high->intrin))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@@ -70,7 +70,8 @@ protected:
|
|||||||
bool test_alu(nir_instr *instr, nir_op op);
|
bool test_alu(nir_instr *instr, nir_op op);
|
||||||
bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
|
bool test_alu_def(nir_instr *instr, unsigned index, nir_ssa_def *def, unsigned swizzle=0);
|
||||||
|
|
||||||
static bool mem_vectorize_callback(unsigned align, unsigned bit_size,
|
static bool mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
|
||||||
|
unsigned bit_size,
|
||||||
unsigned num_components, unsigned high_offset,
|
unsigned num_components, unsigned high_offset,
|
||||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high);
|
nir_intrinsic_instr *low, nir_intrinsic_instr *high);
|
||||||
static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
|
static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align);
|
||||||
@@ -363,7 +364,8 @@ bool nir_load_store_vectorize_test::test_alu_def(
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool nir_load_store_vectorize_test::mem_vectorize_callback(
|
bool nir_load_store_vectorize_test::mem_vectorize_callback(
|
||||||
unsigned align, unsigned bit_size, unsigned num_components, unsigned high_offset,
|
unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
||||||
|
unsigned num_components, unsigned high_offset,
|
||||||
nir_intrinsic_instr *low, nir_intrinsic_instr *high)
|
nir_intrinsic_instr *low, nir_intrinsic_instr *high)
|
||||||
{
|
{
|
||||||
return bit_size / 8;
|
return bit_size / 8;
|
||||||
|
@@ -855,7 +855,8 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
brw_nir_should_vectorize_mem(unsigned align, unsigned bit_size,
|
brw_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||||
|
unsigned bit_size,
|
||||||
unsigned num_components, unsigned high_offset,
|
unsigned num_components, unsigned high_offset,
|
||||||
nir_intrinsic_instr *low,
|
nir_intrinsic_instr *low,
|
||||||
nir_intrinsic_instr *high)
|
nir_intrinsic_instr *high)
|
||||||
@@ -873,6 +874,13 @@ brw_nir_should_vectorize_mem(unsigned align, unsigned bit_size,
|
|||||||
if (num_components > 4)
|
if (num_components > 4)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t align;
|
||||||
|
if (align_offset)
|
||||||
|
align = 1 << (ffs(align_offset) - 1);
|
||||||
|
else
|
||||||
|
align = align_mul;
|
||||||
|
|
||||||
if (align < bit_size / 8)
|
if (align < bit_size / 8)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user