nir: rework and fix rotate lowering

No driver supports urol/uror on all bit sizes. Intel gen11+ only for 16
and 32 bit, Nvidia GV100+ only for 32 bit. Etnaviv can support it on 8,
16 and 32 bit.

Also turn the `lower` into a `has` option as only two drivers actually
support `uror` and `urol` at this momemt.

Fixes crashes with CL integer_rotate on iris and nouveau since we emit
urol for `rotate`.

v2: always lower 64 bit

Fixes: fe0965afa6 ("spirv: Don't use libclc for rotate")
Signed-off-by: Karol Herbst <kherbst@redhat.com>
Reviewed-by (Intel and nir): Ian Romanick <ian.d.romanick@intel.com>

Reviewed-by: David Heidelberg <david.heidelberg@collabora.com>
Acked-by: Yonggang Luo <luoyonggang@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27090>
(cherry picked from commit f2b7c4ce29)
This commit is contained in:
Karol Herbst
2024-01-16 12:55:10 +01:00
committed by Eric Engestrom
parent 1f89910f6b
commit 08229beb4e
26 changed files with 30 additions and 46 deletions

View File

@@ -314,7 +314,7 @@
"description": "nir: rework and fix rotate lowering",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "fe0965afa6becfc9c9aa341babd34bc5920e421b",
"notes": null

View File

@@ -93,7 +93,6 @@ get_nir_options_for_stage(struct radv_physical_device *device, gl_shader_stage s
.lower_ffma64 = split_fma,
.lower_fpow = true,
.lower_mul_2x32_64 = true,
.lower_rotate = true,
.lower_iadd_sat = device->rad_info.gfx_level <= GFX8,
.lower_hadd = true,
.lower_mul_32x16 = true,

View File

@@ -275,7 +275,6 @@ static const nir_shader_compiler_options agx_nir_options = {
.lower_hadd = true,
.vectorize_io = true,
.use_interpolated_input_intrinsics = true,
.lower_rotate = true,
.has_isub = true,
.support_16bit_alu = true,
.max_unroll_iterations = 32,

View File

@@ -228,7 +228,6 @@ const nir_shader_compiler_options v3dv_nir_options = {
.lower_ldexp = true,
.lower_mul_high = true,
.lower_wpos_pntc = false,
.lower_rotate = true,
.lower_to_scalar = true,
.lower_device_index_to_zero = true,
.lower_fquantize2f16 = true,

View File

@@ -3789,8 +3789,10 @@ typedef struct nir_shader_compiler_options {
/* Lowers when 32x32->64 bit multiplication is not supported */
bool lower_mul_2x32_64;
/* Lowers when rotate instruction is not supported */
bool lower_rotate;
/* Indicates that urol and uror are supported */
bool has_rotate8;
bool has_rotate16;
bool has_rotate32;
/** Backend supports ternary addition */
bool has_iadd3;

View File

@@ -1380,22 +1380,22 @@ optimizations.extend([
(('ishr', a, 0), a),
(('ushr', 0, a), 0),
(('ushr', a, 0), a),
(('ior', ('ishl@16', a, b), ('ushr@16', a, ('iadd', 16, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'),
(('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), '!options->lower_rotate'),
(('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), '!options->lower_rotate'),
(('ior', ('ishl@32', a, b), ('ushr@32', a, ('isub', 32, b))), ('urol', a, b), '!options->lower_rotate'),
(('ior', ('ushr@16', a, b), ('ishl@16', a, ('iadd', 16, ('ineg', b)))), ('uror', a, b), '!options->lower_rotate'),
(('ior', ('ushr@16', a, b), ('ishl@16', a, ('isub', 16, b))), ('uror', a, b), '!options->lower_rotate'),
(('ior', ('ushr@32', a, b), ('ishl@32', a, ('iadd', 32, ('ineg', b)))), ('uror', a, b), '!options->lower_rotate'),
(('ior', ('ushr@32', a, b), ('ishl@32', a, ('isub', 32, b))), ('uror', a, b), '!options->lower_rotate'),
(('urol@8', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 8, b))), 'options->lower_rotate'),
(('urol@16', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 16, b))), 'options->lower_rotate'),
(('urol@32', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 32, b))), 'options->lower_rotate'),
(('urol@64', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 64, b))), 'options->lower_rotate'),
(('uror@8', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 8, b))), 'options->lower_rotate'),
(('uror@16', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 16, b))), 'options->lower_rotate'),
(('uror@32', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 32, b))), 'options->lower_rotate'),
(('uror@64', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 64, b))), 'options->lower_rotate'),
(('ior', ('ishl@16', a, b), ('ushr@16', a, ('iadd', 16, ('ineg', b)))), ('urol', a, b), 'options->has_rotate16'),
(('ior', ('ishl@16', a, b), ('ushr@16', a, ('isub', 16, b))), ('urol', a, b), 'options->has_rotate16'),
(('ior', ('ishl@32', a, b), ('ushr@32', a, ('iadd', 32, ('ineg', b)))), ('urol', a, b), 'options->has_rotate32'),
(('ior', ('ishl@32', a, b), ('ushr@32', a, ('isub', 32, b))), ('urol', a, b), 'options->has_rotate32'),
(('ior', ('ushr@16', a, b), ('ishl@16', a, ('iadd', 16, ('ineg', b)))), ('uror', a, b), 'options->has_rotate16'),
(('ior', ('ushr@16', a, b), ('ishl@16', a, ('isub', 16, b))), ('uror', a, b), 'options->has_rotate16'),
(('ior', ('ushr@32', a, b), ('ishl@32', a, ('iadd', 32, ('ineg', b)))), ('uror', a, b), 'options->has_rotate32'),
(('ior', ('ushr@32', a, b), ('ishl@32', a, ('isub', 32, b))), ('uror', a, b), 'options->has_rotate32'),
(('urol@8', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 8, b))), '!options->has_rotate8'),
(('urol@16', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 16, b))), '!options->has_rotate16'),
(('urol@32', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 32, b))), '!options->has_rotate32'),
(('urol@64', a, b), ('ior', ('ishl', a, b), ('ushr', a, ('isub', 64, b)))),
(('uror@8', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 8, b))), '!options->has_rotate8'),
(('uror@16', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 16, b))), '!options->has_rotate16'),
(('uror@32', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 32, b))), '!options->has_rotate32'),
(('uror@64', a, b), ('ior', ('ushr', a, b), ('ishl', a, ('isub', 64, b)))),
# bfi(X, a, b) = (b & ~X) | (a & X)
# If X = ~0: (b & 0) | (a & 0xffffffff) = a

View File

@@ -109,7 +109,6 @@ static const nir_shader_compiler_options ir3_base_options = {
.lower_unpack_unorm_2x16 = true,
.lower_pack_split = true,
.use_interpolated_input_intrinsics = true,
.lower_rotate = true,
.lower_to_scalar = true,
.has_imul24 = true,
.has_fsub = true,

View File

@@ -3689,13 +3689,15 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s,
!options->lower_fdph ||
!options->lower_flrp64 ||
!options->lower_fmod ||
!options->lower_rotate ||
!options->lower_uadd_carry ||
!options->lower_usub_borrow ||
!options->lower_uadd_sat ||
!options->lower_usub_sat ||
!options->lower_uniforms_to_ubo ||
!options->lower_vector_cmp ||
options->has_rotate8 ||
options->has_rotate16 ||
options->has_rotate32 ||
options->lower_fsqrt != lower_fsqrt ||
options->force_indirect_unrolling != no_indirects_mask ||
force_indirect_unrolling_sampler) {
@@ -3709,7 +3711,6 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s,
new_options->lower_fdph = true;
new_options->lower_flrp64 = true;
new_options->lower_fmod = true;
new_options->lower_rotate = true;
new_options->lower_uadd_carry = true;
new_options->lower_usub_borrow = true;
new_options->lower_uadd_sat = true;
@@ -3717,6 +3718,9 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s,
new_options->lower_uniforms_to_ubo = true;
new_options->lower_vector_cmp = true;
new_options->lower_fsqrt = lower_fsqrt;
new_options->has_rotate8 = false;
new_options->has_rotate16 = false;
new_options->has_rotate32 = false;
new_options->force_indirect_unrolling = no_indirects_mask;
new_options->force_indirect_unrolling_sampler = force_indirect_unrolling_sampler;
@@ -4062,7 +4066,6 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
.lower_fdph = true,
.lower_flrp64 = true,
.lower_fmod = true,
.lower_rotate = true,
.lower_uniforms_to_ubo = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,

View File

@@ -43,7 +43,6 @@ static const nir_shader_compiler_options options = {
.lower_all_io_to_temps = true,
.vertex_id_zero_based = true, /* its not implemented anyway */
.lower_bitops = true,
.lower_rotate = true,
.lower_vector_cmp = true,
.lower_fdph = true,
.has_fsub = true,

View File

@@ -117,7 +117,6 @@ static const nir_shader_compiler_options i915_compiler_options = {
.lower_fdph = true,
.lower_flrp32 = true,
.lower_fmod = true,
.lower_rotate = true,
.lower_sincos = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
@@ -161,7 +160,6 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_rotate = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_mul_2x32_64 = true,

View File

@@ -57,7 +57,6 @@ static const nir_shader_compiler_options vs_nir_options = {
/* could be implemented by clamp */
.lower_fsat = true,
.lower_bitops = true,
.lower_rotate = true,
.lower_sincos = true,
.lower_fceil = true,
.lower_insert_byte = true,
@@ -78,7 +77,6 @@ static const nir_shader_compiler_options fs_nir_options = {
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_fsign = true,
.lower_rotate = true,
.lower_fdot = true,
.lower_fdph = true,
.lower_insert_byte = true,

View File

@@ -616,7 +616,6 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_rotate = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_mul_2x32_64 = true,

View File

@@ -477,7 +477,6 @@ static const nir_shader_compiler_options nv30_base_compiler_options = {
.lower_flrp64 = true,
.lower_fmod = true,
.lower_fpow = true, /* In hardware as of nv40 FS */
.lower_rotate = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
.force_indirect_unrolling = nir_var_all,

View File

@@ -503,7 +503,6 @@ static int r300_get_video_param(struct pipe_screen *screen,
.lower_ftrunc = true, \
.lower_insert_byte = true, \
.lower_insert_word = true, \
.lower_rotate = true, \
.lower_uniforms_to_ubo = true, \
.lower_vector_cmp = true, \
.no_integers = true, \

View File

@@ -1394,7 +1394,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_ldexp = true,
.lower_rotate = true,
/* due to a bug in the shader compiler, some loops hang
* if they are not unrolled, see:
* https://bugs.freedesktop.org/show_bug.cgi?id=86720

View File

@@ -1323,7 +1323,6 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
.lower_hadd = true,
.lower_hadd64 = true,
.lower_fisnormal = true,
.lower_rotate = true,
.lower_to_scalar = true,
.lower_to_scalar_filter = sscreen->info.has_packed_math_16bit ?
si_alu_to_scalar_packed_math_filter : NULL,

View File

@@ -85,7 +85,6 @@ static const nir_shader_compiler_options sp_compiler_options = {
.lower_fdph = true,
.lower_flrp64 = true,
.lower_fmod = true,
.lower_rotate = true,
.lower_uniforms_to_ubo = true,
.lower_vector_cmp = true,
.lower_int64_options = nir_lower_imul_2x32_64,

View File

@@ -737,7 +737,6 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
.lower_fdph = true, \
.lower_flrp64 = true, \
.lower_ldexp = true, \
.lower_rotate = true, \
.lower_uniforms_to_ubo = true, \
.lower_vector_cmp = true, \
.lower_cs_local_index_to_id = true, \

View File

@@ -729,7 +729,6 @@ static const nir_shader_compiler_options v3d_nir_options = {
.lower_ldexp = true,
.lower_mul_high = true,
.lower_wpos_pntc = true,
.lower_rotate = true,
.lower_to_scalar = true,
.lower_int64_options = nir_lower_imul_2x32_64,
.lower_fquantize2f16 = true,

View File

@@ -2174,7 +2174,6 @@ static const nir_shader_compiler_options nir_options = {
.lower_ldexp = true,
.lower_fneg = true,
.lower_ineg = true,
.lower_rotate = true,
.lower_to_scalar = true,
.lower_umax = true,
.lower_umin = true,

View File

@@ -1227,7 +1227,6 @@ zink_screen_init_compiler(struct zink_screen *screen)
.lower_ldexp = true,
.lower_mul_high = true,
.lower_rotate = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_uadd_sat = true,

View File

@@ -189,7 +189,8 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
nir_options->has_bfm = devinfo->ver >= 7;
nir_options->has_bfi = devinfo->ver >= 7;
nir_options->lower_rotate = devinfo->ver < 11;
nir_options->has_rotate16 = devinfo->ver >= 11;
nir_options->has_rotate32 = devinfo->ver >= 11;
nir_options->lower_bitfield_reverse = devinfo->ver < 7;
nir_options->lower_find_lsb = devinfo->ver < 7;
nir_options->lower_ifind_msb = devinfo->ver < 7;

View File

@@ -119,7 +119,6 @@ nir_options = {
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_mul_high = true,
.lower_rotate = true,
.lower_pack_half_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_pack_snorm_4x8 = true,

View File

@@ -3465,7 +3465,7 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
op.unify_interfaces = false;
op.use_interpolated_input_intrinsics = true;
op.lower_mul_2x32_64 = true; // TODO
op.lower_rotate = (chipset < NVISA_GV100_CHIPSET);
op.has_rotate32 = (chipset >= NVISA_GV100_CHIPSET);
op.has_imul24 = false;
op.has_fmulz = (chipset > NVISA_G80_CHIPSET);
op.intel_vec4 = false;

View File

@@ -55,7 +55,6 @@ void bifrost_compile_shader_nir(nir_shader *nir,
.lower_bitfield_insert = true, \
.lower_bitfield_extract = true, \
.lower_insert_byte = true, \
.lower_rotate = true, \
\
/* Vertex ID is zero based in the traditional geometry flows, but not in \
* the memory-allocated IDVS flow introduced and used exclusively in \

View File

@@ -73,7 +73,6 @@ static const nir_shader_compiler_options midgard_nir_options = {
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_ldexp = true,
.lower_rotate = true,
.lower_pack_half_2x16 = true,
.lower_pack_unorm_2x16 = true,