radv: improve vectorization callback for small bit sizes

More accurately reflect the hardware's capabilities for byte and short
aligned VMEM operations.

fossil-db (GFX10.3):
Totals from 65 (0.05% of 139391) affected shaders:
SGPRs: 4296 -> 4200 (-2.23%)
CodeSize: 1000984 -> 1000368 (-0.06%); split: -0.13%, +0.07%
Instrs: 177504 -> 177380 (-0.07%); split: -0.17%, +0.10%
Cycles: 36820596 -> 36812792 (-0.02%); split: -0.15%, +0.13%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10384>
This commit is contained in:
Rhys Perry
2020-10-23 15:24:15 +01:00
committed by Marge Bot
parent 89b759c4f9
commit 8408d0312f

View File

@@ -3035,8 +3035,16 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_s
case nir_intrinsic_store_ssbo:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_push_constant:
return align % (bit_size == 8 ? 2 : 4) == 0;
case nir_intrinsic_load_push_constant: {
unsigned max_components;
if (align % 4 == 0)
max_components = NIR_MAX_VEC_COMPONENTS;
else if (align % 2 == 0)
max_components = 16u / bit_size;
else
max_components = 8u / bit_size;
return (align % (bit_size / 8u)) == 0 && num_components <= max_components;
}
case nir_intrinsic_load_deref:
case nir_intrinsic_store_deref:
assert(nir_deref_mode_is(nir_src_as_deref(low->src[0]), nir_var_mem_shared));