nir, nir/algebraic: add byte/word insertion instructions
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3151>
This commit is contained in:
@@ -74,6 +74,8 @@ static const struct nir_shader_compiler_options nir_options = {
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_ffma16 = true,
|
||||
.lower_ffma32 = true,
|
||||
.lower_ffma64 = true,
|
||||
|
@@ -155,6 +155,8 @@ static const nir_shader_compiler_options agx_nir_options = {
|
||||
.lower_fsign = true,
|
||||
.lower_rotate = true,
|
||||
.lower_pack_split = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_uniforms_to_ubo = true,
|
||||
.lower_cs_local_index_from_id = true,
|
||||
|
||||
|
@@ -188,6 +188,8 @@ const nir_shader_compiler_options v3dv_nir_options = {
|
||||
.lower_all_io_to_temps = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
|
@@ -3259,6 +3259,8 @@ typedef struct nir_shader_compiler_options {
|
||||
|
||||
bool lower_extract_byte;
|
||||
bool lower_extract_word;
|
||||
bool lower_insert_byte;
|
||||
bool lower_insert_word;
|
||||
|
||||
bool lower_all_io_to_temps;
|
||||
bool lower_all_io_to_elements;
|
||||
|
@@ -972,6 +972,10 @@ binop("extract_i8", tint, "", "(int8_t)(src0 >> (src1 * 8))")
|
||||
binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
|
||||
binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
|
||||
|
||||
# Byte/word insertion
|
||||
binop("insert_u8", tuint, "", "(src0 & 0xff) << (src1 * 8)")
|
||||
binop("insert_u16", tuint, "", "(src0 & 0xffff) << (src1 * 16)")
|
||||
|
||||
|
||||
def triop(name, ty, alg_props, const_expr):
|
||||
opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], False, alg_props, const_expr)
|
||||
|
@@ -2429,6 +2429,19 @@ for N in [16, 32]:
|
||||
((x2xN, ('i2i16', aN)), (extract_x16, a, 0), '!options->lower_extract_word'),
|
||||
])
|
||||
|
||||
# Byte insertion
|
||||
late_optimizations.extend([(('ishl', ('extract_u8', 'a@32', 0), 8 * i), ('insert_u8', a, i), '!options->lower_insert_byte') for i in range(1, 4)])
|
||||
late_optimizations.extend([(('iand', ('ishl', 'a@32', 8 * i), 0xff << (8 * i)), ('insert_u8', a, i), '!options->lower_insert_byte') for i in range(1, 4)])
|
||||
late_optimizations.append((('ishl', 'a@32', 24), ('insert_u8', a, 3), '!options->lower_insert_byte'))
|
||||
|
||||
late_optimizations += [
|
||||
# Word insertion
|
||||
(('ishl', 'a@32', 16), ('insert_u16', a, 1), '!options->lower_insert_word'),
|
||||
|
||||
# Extract and then insert
|
||||
(('insert_u8', ('extract_u8', 'a', 0), b), ('insert_u8', a, b)),
|
||||
(('insert_u16', ('extract_u16', 'a', 0), b), ('insert_u16', a, b)),
|
||||
]
|
||||
|
||||
# Integer sizes
|
||||
for s in [8, 16, 32, 64]:
|
||||
|
@@ -53,6 +53,8 @@ static const nir_shader_compiler_options options = {
|
||||
.vertex_id_zero_based = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_helper_invocation = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
@@ -107,6 +109,8 @@ static const nir_shader_compiler_options options_a6xx = {
|
||||
.vertex_id_zero_based = false,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_helper_invocation = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
|
@@ -2659,6 +2659,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
|
||||
|
||||
if (!options->lower_extract_byte ||
|
||||
!options->lower_extract_word ||
|
||||
!options->lower_insert_byte ||
|
||||
!options->lower_insert_word ||
|
||||
!options->lower_fdph ||
|
||||
!options->lower_flrp64 ||
|
||||
!options->lower_fmod ||
|
||||
@@ -2671,6 +2673,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
|
||||
|
||||
new_options->lower_extract_byte = true;
|
||||
new_options->lower_extract_word = true;
|
||||
new_options->lower_insert_byte = true;
|
||||
new_options->lower_insert_word = true;
|
||||
new_options->lower_fdph = true;
|
||||
new_options->lower_flrp64 = true;
|
||||
new_options->lower_fmod = true;
|
||||
@@ -2835,6 +2839,8 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
|
||||
.fuse_ffma64 = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_fdph = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_fmod = true,
|
||||
|
@@ -1067,6 +1067,8 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
|
||||
.lower_fmod = true,
|
||||
.lower_vector_cmp = true,
|
||||
.lower_fdph = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_fdiv = true, /* !screen->specs.has_new_transcendentals */
|
||||
.lower_fsign = !screen->specs.has_sign_floor_ceil,
|
||||
.lower_ffloor = !screen->specs.has_sign_floor_ceil,
|
||||
|
@@ -47,6 +47,8 @@ static const nir_shader_compiler_options options = {
|
||||
.lower_fdph = true,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
};
|
||||
|
||||
const nir_shader_compiler_options *
|
||||
|
@@ -59,6 +59,8 @@ static const nir_shader_compiler_options vs_nir_options = {
|
||||
.lower_rotate = true,
|
||||
.lower_sincos = true,
|
||||
.lower_fceil = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
};
|
||||
|
||||
static const nir_shader_compiler_options fs_nir_options = {
|
||||
@@ -74,6 +76,8 @@ static const nir_shader_compiler_options fs_nir_options = {
|
||||
.lower_rotate = true,
|
||||
.lower_fdot = true,
|
||||
.lower_fdph = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_bitops = true,
|
||||
.lower_vector_cmp = true,
|
||||
};
|
||||
|
@@ -577,6 +577,8 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_rotate = true,
|
||||
.lower_uadd_carry = true,
|
||||
.lower_usub_borrow = true,
|
||||
|
@@ -3265,6 +3265,8 @@ nvir_nir_shader_compiler_options(int chipset)
|
||||
op.lower_pack_split = false;
|
||||
op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
|
||||
op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
|
||||
op.lower_insert_byte = true;
|
||||
op.lower_insert_word = true;
|
||||
op.lower_all_io_to_temps = false;
|
||||
op.lower_all_io_to_elements = false;
|
||||
op.vertex_id_zero_based = false;
|
||||
|
@@ -978,6 +978,8 @@ static const nir_shader_compiler_options nir_options = {
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_all_io_to_temps = false,
|
||||
.lower_cs_local_index_from_id = true,
|
||||
.lower_rotate = true,
|
||||
|
@@ -1334,6 +1334,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
|
||||
.lower_int64_options = ~0,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_rotate = true,
|
||||
.max_unroll_iterations = 32,
|
||||
.lower_interpolate_at = true,
|
||||
|
@@ -1004,6 +1004,8 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_rotate = true,
|
||||
.lower_to_scalar = true,
|
||||
.optimize_sample_mask_in = true,
|
||||
|
@@ -78,6 +78,8 @@ static const nir_shader_compiler_options sp_compiler_options = {
|
||||
.fuse_ffma64 = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_fdph = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_fmod = true,
|
||||
|
@@ -643,6 +643,8 @@ static const nir_shader_compiler_options v3d_nir_options = {
|
||||
.lower_all_io_to_temps = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_bitfield_reverse = true,
|
||||
|
@@ -2173,6 +2173,8 @@ static const nir_shader_compiler_options nir_options = {
|
||||
.lower_all_io_to_temps = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_ffma16 = true,
|
||||
.lower_ffma32 = true,
|
||||
|
@@ -379,6 +379,8 @@ zink_screen_init_compiler(struct zink_screen *screen)
|
||||
.lower_fsat = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_mul_high = true,
|
||||
.lower_rotate = true,
|
||||
.lower_uadd_carry = true,
|
||||
|
@@ -45,6 +45,8 @@
|
||||
.lower_device_index_to_zero = true, \
|
||||
.vectorize_io = true, \
|
||||
.use_interpolated_input_intrinsics = true, \
|
||||
.lower_insert_byte = true, \
|
||||
.lower_insert_word = true, \
|
||||
.vertex_id_zero_based = true, \
|
||||
.lower_base_vertex = true, \
|
||||
.use_scoped_barrier = true, \
|
||||
|
@@ -90,6 +90,8 @@ nir_options = {
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_all_io_to_elements = true,
|
||||
.lower_all_io_to_temps = true,
|
||||
.lower_hadd = true,
|
||||
|
@@ -56,6 +56,8 @@ static const nir_shader_compiler_options bifrost_nir_options = {
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_rotate = true,
|
||||
|
||||
.lower_pack_half_2x16 = true,
|
||||
|
@@ -68,6 +68,8 @@ static const nir_shader_compiler_options midgard_nir_options = {
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_insert_word = true,
|
||||
.lower_rotate = true,
|
||||
|
||||
.lower_pack_half_2x16 = true,
|
||||
|
Reference in New Issue
Block a user