radv: improve vectorization callback for small bit sizes
More accurately reflect the hardware's capabilities for byte and short aligned VMEM operations. fossil-db (GFX10.3): Totals from 65 (0.05% of 139391) affected shaders: SGPRs: 4296 -> 4200 (-2.23%) CodeSize: 1000984 -> 1000368 (-0.06%); split: -0.13%, +0.07% Instrs: 177504 -> 177380 (-0.07%); split: -0.17%, +0.10% Cycles: 36820596 -> 36812792 (-0.02%); split: -0.15%, +0.13% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10384>
This commit is contained in:
@@ -3035,8 +3035,16 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_s
|
||||
case nir_intrinsic_store_ssbo:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_push_constant:
|
||||
return align % (bit_size == 8 ? 2 : 4) == 0;
|
||||
case nir_intrinsic_load_push_constant: {
|
||||
unsigned max_components;
|
||||
if (align % 4 == 0)
|
||||
max_components = NIR_MAX_VEC_COMPONENTS;
|
||||
else if (align % 2 == 0)
|
||||
max_components = 16u / bit_size;
|
||||
else
|
||||
max_components = 8u / bit_size;
|
||||
return (align % (bit_size / 8u)) == 0 && num_components <= max_components;
|
||||
}
|
||||
case nir_intrinsic_load_deref:
|
||||
case nir_intrinsic_store_deref:
|
||||
assert(nir_deref_mode_is(nir_src_as_deref(low->src[0]), nir_var_mem_shared));
|
||||
|
Reference in New Issue
Block a user