nir, nir/algebraic: add byte/word insertion instructions

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3151>
This commit is contained in:
Rhys Perry
2020-03-25 15:38:06 +00:00
committed by Marge Bot
parent edae3e5623
commit 1cbcfb8b38
24 changed files with 69 additions and 0 deletions

View File

@@ -74,6 +74,8 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,

View File

@@ -155,6 +155,8 @@ static const nir_shader_compiler_options agx_nir_options = {
.lower_fsign = true,
.lower_rotate = true,
.lower_pack_split = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_uniforms_to_ubo = true,
.lower_cs_local_index_from_id = true,

View File

@@ -188,6 +188,8 @@ const nir_shader_compiler_options v3dv_nir_options = {
.lower_all_io_to_temps = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_bitfield_reverse = true,

View File

@@ -3259,6 +3259,8 @@ typedef struct nir_shader_compiler_options {
bool lower_extract_byte;
bool lower_extract_word;
bool lower_insert_byte;
bool lower_insert_word;
bool lower_all_io_to_temps;
bool lower_all_io_to_elements;

View File

@@ -972,6 +972,10 @@ binop("extract_i8", tint, "", "(int8_t)(src0 >> (src1 * 8))")
binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
# Byte/word insertion
binop("insert_u8", tuint, "", "(src0 & 0xff) << (src1 * 8)")
binop("insert_u16", tuint, "", "(src0 & 0xffff) << (src1 * 16)")
def triop(name, ty, alg_props, const_expr):
opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], False, alg_props, const_expr)

View File

@@ -2429,6 +2429,19 @@ for N in [16, 32]:
((x2xN, ('i2i16', aN)), (extract_x16, a, 0), '!options->lower_extract_word'),
])
# Byte insertion
late_optimizations.extend([(('ishl', ('extract_u8', 'a@32', 0), 8 * i), ('insert_u8', a, i), '!options->lower_insert_byte') for i in range(1, 4)])
late_optimizations.extend([(('iand', ('ishl', 'a@32', 8 * i), 0xff << (8 * i)), ('insert_u8', a, i), '!options->lower_insert_byte') for i in range(1, 4)])
late_optimizations.append((('ishl', 'a@32', 24), ('insert_u8', a, 3), '!options->lower_insert_byte'))
late_optimizations += [
# Word insertion
(('ishl', 'a@32', 16), ('insert_u16', a, 1), '!options->lower_insert_word'),
# Extract and then insert
(('insert_u8', ('extract_u8', 'a', 0), b), ('insert_u8', a, b)),
(('insert_u16', ('extract_u16', 'a', 0), b), ('insert_u16', a, b)),
]
# Integer sizes
for s in [8, 16, 32, 64]:

View File

@@ -53,6 +53,8 @@ static const nir_shader_compiler_options options = {
.vertex_id_zero_based = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_helper_invocation = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
@@ -107,6 +109,8 @@ static const nir_shader_compiler_options options_a6xx = {
.vertex_id_zero_based = false,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_helper_invocation = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,

View File

@@ -2659,6 +2659,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
if (!options->lower_extract_byte ||
!options->lower_extract_word ||
!options->lower_insert_byte ||
!options->lower_insert_word ||
!options->lower_fdph ||
!options->lower_flrp64 ||
!options->lower_fmod ||
@@ -2671,6 +2673,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
new_options->lower_extract_byte = true;
new_options->lower_extract_word = true;
new_options->lower_insert_byte = true;
new_options->lower_insert_word = true;
new_options->lower_fdph = true;
new_options->lower_flrp64 = true;
new_options->lower_fmod = true;
@@ -2835,6 +2839,8 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
.fuse_ffma64 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_fdph = true,
.lower_flrp64 = true,
.lower_fmod = true,

View File

@@ -1067,6 +1067,8 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
.lower_fmod = true,
.lower_vector_cmp = true,
.lower_fdph = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_fdiv = true, /* !screen->specs.has_new_transcendentals */
.lower_fsign = !screen->specs.has_sign_floor_ceil,
.lower_ffloor = !screen->specs.has_sign_floor_ceil,

View File

@@ -47,6 +47,8 @@ static const nir_shader_compiler_options options = {
.lower_fdph = true,
.has_fsub = true,
.has_isub = true,
.lower_insert_byte = true,
.lower_insert_word = true,
};
const nir_shader_compiler_options *

View File

@@ -59,6 +59,8 @@ static const nir_shader_compiler_options vs_nir_options = {
.lower_rotate = true,
.lower_sincos = true,
.lower_fceil = true,
.lower_insert_byte = true,
.lower_insert_word = true,
};
static const nir_shader_compiler_options fs_nir_options = {
@@ -74,6 +76,8 @@ static const nir_shader_compiler_options fs_nir_options = {
.lower_rotate = true,
.lower_fdot = true,
.lower_fdph = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_bitops = true,
.lower_vector_cmp = true,
};

View File

@@ -577,6 +577,8 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_rotate = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,

View File

@@ -3265,6 +3265,8 @@ nvir_nir_shader_compiler_options(int chipset)
op.lower_pack_split = false;
op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
op.lower_insert_byte = true;
op.lower_insert_word = true;
op.lower_all_io_to_temps = false;
op.lower_all_io_to_elements = false;
op.vertex_id_zero_based = false;

View File

@@ -978,6 +978,8 @@ static const nir_shader_compiler_options nir_options = {
.lower_unpack_snorm_4x8 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_all_io_to_temps = false,
.lower_cs_local_index_from_id = true,
.lower_rotate = true,

View File

@@ -1334,6 +1334,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
.lower_int64_options = ~0,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_rotate = true,
.max_unroll_iterations = 32,
.lower_interpolate_at = true,

View File

@@ -1004,6 +1004,8 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
.lower_unpack_unorm_4x8 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_rotate = true,
.lower_to_scalar = true,
.optimize_sample_mask_in = true,

View File

@@ -78,6 +78,8 @@ static const nir_shader_compiler_options sp_compiler_options = {
.fuse_ffma64 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_fdph = true,
.lower_flrp64 = true,
.lower_fmod = true,

View File

@@ -643,6 +643,8 @@ static const nir_shader_compiler_options v3d_nir_options = {
.lower_all_io_to_temps = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_bitfield_reverse = true,

View File

@@ -2173,6 +2173,8 @@ static const nir_shader_compiler_options nir_options = {
.lower_all_io_to_temps = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_fdiv = true,
.lower_ffma16 = true,
.lower_ffma32 = true,

View File

@@ -379,6 +379,8 @@ zink_screen_init_compiler(struct zink_screen *screen)
.lower_fsat = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_mul_high = true,
.lower_rotate = true,
.lower_uadd_carry = true,

View File

@@ -45,6 +45,8 @@
.lower_device_index_to_zero = true, \
.vectorize_io = true, \
.use_interpolated_input_intrinsics = true, \
.lower_insert_byte = true, \
.lower_insert_word = true, \
.vertex_id_zero_based = true, \
.lower_base_vertex = true, \
.use_scoped_barrier = true, \

View File

@@ -90,6 +90,8 @@ nir_options = {
.lower_bitfield_extract_to_shifts = true,
.lower_extract_word = true,
.lower_extract_byte = true,
.lower_insert_word = true,
.lower_insert_byte = true,
.lower_all_io_to_elements = true,
.lower_all_io_to_temps = true,
.lower_hadd = true,

View File

@@ -56,6 +56,8 @@ static const nir_shader_compiler_options bifrost_nir_options = {
.lower_bitfield_extract_to_shifts = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_rotate = true,
.lower_pack_half_2x16 = true,

View File

@@ -68,6 +68,8 @@ static const nir_shader_compiler_options midgard_nir_options = {
.lower_bitfield_extract_to_shifts = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_rotate = true,
.lower_pack_half_2x16 = true,