diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 0a4133d49a3..66777c7213e 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -279,7 +279,7 @@ NIR_FILES = \ nir/nir_lower_io_to_scalar.c \ nir/nir_lower_io_to_vector.c \ nir/nir_lower_multiview.c \ - nir/nir_lower_mediump_outputs.c \ + nir/nir_lower_mediump.c \ nir/nir_lower_memcpy.c \ nir/nir_lower_memory_model.c \ nir/nir_lower_non_uniform_access.c \ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index f45a9c17b43..d17bbe504a5 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -168,7 +168,7 @@ files_libnir = files( 'nir_lower_io_to_scalar.c', 'nir_lower_io_to_vector.c', 'nir_lower_multiview.c', - 'nir_lower_mediump_outputs.c', + 'nir_lower_mediump.c', 'nir_lower_memcpy.c', 'nir_lower_memory_model.c', 'nir_lower_non_uniform_access.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 2054fd761d6..bc386b5435c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4991,7 +4991,23 @@ bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64, nir_lower_doubles_options options); bool nir_lower_pack(nir_shader *shader); -void nir_lower_mediump_outputs(nir_shader *nir); +bool nir_recompute_io_bases(nir_function_impl *impl, nir_variable_mode modes); +bool nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes, + uint64_t varying_mask, bool use_16bit_slots); +bool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes, + nir_alu_type types); +bool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes); +bool nir_fold_16bit_sampler_conversions(nir_shader *nir, + unsigned tex_src_types); + +typedef struct { + bool legalize_type; /* whether this src should be legalized */ + uint8_t bit_size; /* bit_size to enforce */ + nir_tex_src_type match_src; /* if bit_size is 0, match bit size of this */ +} nir_tex_src_type_constraint, nir_tex_src_type_constraints[nir_num_tex_src_types]; + +bool nir_legalize_16bit_sampler_srcs(nir_shader *nir, + nir_tex_src_type_constraints constraints); bool nir_lower_point_size(nir_shader *shader, float min, float max); diff --git a/src/compiler/nir/nir_lower_mediump.c b/src/compiler/nir/nir_lower_mediump.c new file mode 100644 index 00000000000..0cc58c1e755 --- /dev/null +++ b/src/compiler/nir/nir_lower_mediump.c @@ -0,0 +1,611 @@ +/* + * Copyright (C) 2020 Google, Inc. + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +/** + * Return the intrinsic if it matches the mask in "modes", else return NULL. + */ +static nir_intrinsic_instr * +get_io_intrinsic(nir_instr *instr, nir_variable_mode modes, + nir_variable_mode *out_mode) +{ + if (instr->type != nir_instr_type_intrinsic) + return NULL; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_input: + case nir_intrinsic_load_input_vertex: + case nir_intrinsic_load_interpolated_input: + case nir_intrinsic_load_per_vertex_input: + *out_mode = nir_var_shader_in; + return modes & nir_var_shader_in ? intr : NULL; + case nir_intrinsic_load_output: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: + *out_mode = nir_var_shader_out; + return modes & nir_var_shader_out ? intr : NULL; + default: + return NULL; + } +} + +/** + * Recompute the IO "base" indices from scratch to remove holes or to fix + * incorrect base values due to changes in IO locations by using IO locations + * to assign new bases. The mapping from locations to bases becomes + * monotonically increasing. + */ +bool +nir_recompute_io_bases(nir_function_impl *impl, nir_variable_mode modes) +{ + BITSET_DECLARE(inputs, NUM_TOTAL_VARYING_SLOTS); + BITSET_DECLARE(outputs, NUM_TOTAL_VARYING_SLOTS); + BITSET_ZERO(inputs); + BITSET_ZERO(outputs); + + /* Gather the bitmasks of used locations. */ + nir_foreach_block_safe (block, impl) { + nir_foreach_instr_safe (instr, block) { + nir_variable_mode mode; + nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode); + if (!intr) + continue; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + unsigned num_slots = sem.num_slots; + if (sem.medium_precision) + num_slots = (num_slots + sem.high_16bits + 1) / 2; + + if (mode == nir_var_shader_in) { + for (unsigned i = 0; i < num_slots; i++) + BITSET_SET(inputs, sem.location + i); + } else if (!sem.dual_source_blend_index) { + for (unsigned i = 0; i < num_slots; i++) + BITSET_SET(outputs, sem.location + i); + } + } + } + + /* Renumber bases. */ + bool changed = false; + + nir_foreach_block_safe (block, impl) { + nir_foreach_instr_safe (instr, block) { + nir_variable_mode mode; + nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode); + if (!intr) + continue; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + unsigned num_slots = sem.num_slots; + if (sem.medium_precision) + num_slots = (num_slots + sem.high_16bits + 1) / 2; + + if (mode == nir_var_shader_in) { + nir_intrinsic_set_base(intr, + BITSET_PREFIX_SUM(inputs, sem.location)); + } else if (sem.dual_source_blend_index) { + nir_intrinsic_set_base(intr, + BITSET_PREFIX_SUM(outputs, NUM_TOTAL_VARYING_SLOTS)); + } else { + nir_intrinsic_set_base(intr, + BITSET_PREFIX_SUM(outputs, sem.location)); + } + changed = true; + } + } + + nir_metadata_preserve(impl, nir_metadata_all); + return changed; +} + +/** + * Lower mediump inputs and/or outputs to 16 bits. + * + * \param modes Whether to lower inputs, outputs, or both. + * \param varying_mask Determines which varyings to skip (VS inputs, + * FS outputs, and patch varyings ignore this mask). + * \param use_16bit_slots Remap lowered slots to* VARYING_SLOT_VARn_16BIT. + */ +bool +nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes, + uint64_t varying_mask, bool use_16bit_slots) +{ + bool changed = false; + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + assert(impl); + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block_safe (block, impl) { + nir_foreach_instr_safe (instr, block) { + nir_variable_mode mode; + nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode); + if (!intr) + continue; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + nir_ssa_def *(*convert)(nir_builder *, nir_ssa_def *); + bool is_varying = !(nir->info.stage == MESA_SHADER_VERTEX && + mode == nir_var_shader_in) && + !(nir->info.stage == MESA_SHADER_FRAGMENT && + mode == nir_var_shader_out); + + if (!sem.medium_precision || + (is_varying && sem.location <= VARYING_SLOT_VAR31 && + !(varying_mask & BITFIELD64_BIT(sem.location)))) + continue; /* can't lower */ + + if (nir_intrinsic_has_src_type(intr)) { + /* Stores. */ + nir_alu_type type = nir_intrinsic_src_type(intr); + + switch (type) { + case nir_type_float32: + convert = nir_f2fmp; + break; + case nir_type_int32: + case nir_type_uint32: + convert = nir_i2imp; + break; + default: + continue; /* already lowered? */ + } + + /* Convert the 32-bit store into a 16-bit store. */ + b.cursor = nir_before_instr(&intr->instr); + nir_instr_rewrite_src_ssa(&intr->instr, &intr->src[0], + convert(&b, intr->src[0].ssa)); + nir_intrinsic_set_src_type(intr, (type & ~32) | 16); + } else { + /* Loads. */ + nir_alu_type type = nir_intrinsic_dest_type(intr); + + switch (type) { + case nir_type_float32: + convert = nir_f2f32; + break; + case nir_type_int32: + convert = nir_i2i32; + break; + case nir_type_uint32: + convert = nir_u2u32; + break; + default: + continue; /* already lowered? */ + } + + /* Convert the 32-bit load into a 16-bit load. */ + b.cursor = nir_after_instr(&intr->instr); + intr->dest.ssa.bit_size = 16; + nir_intrinsic_set_dest_type(intr, (type & ~32) | 16); + nir_ssa_def *dst = convert(&b, &intr->dest.ssa); + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dst, + dst->parent_instr); + } + + if (use_16bit_slots && is_varying && + sem.location >= VARYING_SLOT_VAR0 && + sem.location <= VARYING_SLOT_VAR31) { + unsigned index = sem.location - VARYING_SLOT_VAR0; + + sem.location = VARYING_SLOT_VAR0_16BIT + index / 2; + sem.high_16bits = index % 2; + nir_intrinsic_set_io_semantics(intr, sem); + } + changed = true; + } + } + + if (changed) + nir_recompute_io_bases(impl, modes); + + nir_metadata_preserve(impl, nir_metadata_all); + return changed; +} + +/** + * Set the mediump precision bit for those shader inputs and outputs that are + * set in the "modes" mask. Non-generic varyings (that GLES3 doesn't have) + * are ignored. The "types" mask can be (nir_type_float | nir_type_int), etc. + */ +bool +nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes, + nir_alu_type types) +{ + bool changed = false; + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + assert(impl); + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block_safe (block, impl) { + nir_foreach_instr_safe (instr, block) { + nir_variable_mode mode; + nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode); + if (!intr) + continue; + + nir_alu_type type; + if (nir_intrinsic_has_src_type(intr)) + type = nir_intrinsic_src_type(intr); + else + type = nir_intrinsic_dest_type(intr); + if (!(type & types)) + continue; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + + if (nir->info.stage == MESA_SHADER_FRAGMENT && + mode == nir_var_shader_out) { + /* Only accept FS outputs. */ + if (sem.location < FRAG_RESULT_DATA0 && + sem.location != FRAG_RESULT_COLOR) + continue; + } else if (nir->info.stage == MESA_SHADER_VERTEX && + mode == nir_var_shader_in) { + /* Accept all VS inputs. */ + } else { + /* Only accept generic varyings. */ + if (sem.location < VARYING_SLOT_VAR0 || + sem.location > VARYING_SLOT_VAR31) + continue; + } + + sem.medium_precision = 1; + nir_intrinsic_set_io_semantics(intr, sem); + changed = true; + } + } + + nir_metadata_preserve(impl, nir_metadata_all); + return changed; +} + +/** + * Remap 16-bit varying slots to the original 32-bit varying slots. + * This only changes IO semantics and bases. + */ +bool +nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes) +{ + bool changed = false; + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + assert(impl); + + nir_foreach_block_safe (block, impl) { + nir_foreach_instr_safe (instr, block) { + nir_variable_mode mode; + nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode); + if (!intr) + continue; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + + if (sem.location < VARYING_SLOT_VAR0_16BIT || + sem.location > VARYING_SLOT_VAR15_16BIT) + continue; + + sem.location = VARYING_SLOT_VAR0 + + (sem.location - VARYING_SLOT_VAR0_16BIT) * 2 + + sem.high_16bits; + sem.high_16bits = 0; + nir_intrinsic_set_io_semantics(intr, sem); + changed = true; + } + } + + if (changed) + nir_recompute_io_bases(impl, modes); + + nir_metadata_preserve(impl, nir_metadata_all); + return changed; +} + +static bool +is_n_to_m_conversion(nir_instr *instr, unsigned n, nir_op m) +{ + if (instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + return alu->op == m && alu->src[0].src.ssa->bit_size == n; +} + +static bool +is_f16_to_f32_conversion(nir_instr *instr) +{ + return is_n_to_m_conversion(instr, 16, nir_op_f2f32); +} + +static bool +is_f32_to_f16_conversion(nir_instr *instr) +{ + return is_n_to_m_conversion(instr, 32, nir_op_f2f16) || + is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtne) || + is_n_to_m_conversion(instr, 32, nir_op_f2fmp); +} + +static bool +is_i16_to_i32_conversion(nir_instr *instr) +{ + return is_n_to_m_conversion(instr, 16, nir_op_i2i32); +} + +static bool +is_u16_to_u32_conversion(nir_instr *instr) +{ + return is_n_to_m_conversion(instr, 16, nir_op_u2u32); +} + +static bool +is_i32_to_i16_conversion(nir_instr *instr) +{ + return is_n_to_m_conversion(instr, 32, nir_op_i2i16); +} + +static void +replace_with_mov(nir_builder *b, nir_instr *instr, nir_src *src, + nir_alu_instr *alu) +{ + nir_ssa_def *mov = nir_mov_alu(b, alu->src[0], + nir_dest_num_components(alu->dest.dest)); + assert(!alu->dest.saturate); + nir_instr_rewrite_src_ssa(instr, src, mov); +} + +/** + * If texture source operands use f16->f32 conversions or return values are + * followed by f16->f32 or f32->f16, remove those conversions. This benefits + * drivers that have texture opcodes that can accept and return 16-bit types. + * + * "tex_src_types" is a mask of nir_tex_src_* operands that should be handled. + * It's always done for the destination. + * + * This should be run after late algebraic optimizations. + * Copy propagation and DCE should be run after this. + */ +bool +nir_fold_16bit_sampler_conversions(nir_shader *nir, + unsigned tex_src_types) +{ + bool changed = false; + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + assert(impl); + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block_safe (block, impl) { + nir_foreach_instr_safe (instr, block) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + nir_instr *src; + nir_alu_instr *src_alu; + + /* Skip because AMD doesn't support 16-bit types with these. */ + if ((tex->op == nir_texop_txs || + tex->op == nir_texop_query_levels) || + tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) + continue; + + /* Optimize source operands. */ + for (unsigned i = 0; i < tex->num_srcs; i++) { + /* Filter out sources that should be ignored. */ + if (!(BITFIELD_BIT(tex->src[i].src_type) & tex_src_types)) + continue; + + src = tex->src[i].src.ssa->parent_instr; + if (src->type != nir_instr_type_alu) + continue; + + src_alu = nir_instr_as_alu(src); + b.cursor = nir_before_instr(src); + + if (src_alu->op == nir_op_mov) { + assert(!"The IR shouldn't contain any movs to make this pass" + " effective."); + continue; + } + + /* Handle vector sources that are made of scalar instructions. */ + if (nir_op_is_vec(src_alu->op)) { + /* See if the vector is made of f16->f32 opcodes. */ + unsigned num = nir_dest_num_components(src_alu->dest.dest); + bool is_f16_to_f32 = true; + bool is_u16_to_u32 = true; + + for (unsigned comp = 0; comp < num; comp++) { + nir_instr *instr = src_alu->src[comp].src.ssa->parent_instr; + is_f16_to_f32 &= is_f16_to_f32_conversion(instr); + /* Zero-extension (u16) and sign-extension (i16) have + * the same behavior here - txf returns 0 if bit 15 is set + * because it's out of bounds and the higher bits don't + * matter. + */ + is_u16_to_u32 &= is_u16_to_u32_conversion(instr) || + is_i16_to_i32_conversion(instr); + } + + if (!is_f16_to_f32 && !is_u16_to_u32) + continue; + + nir_alu_instr *new_vec = nir_alu_instr_clone(nir, src_alu); + nir_instr_insert_after(&src_alu->instr, &new_vec->instr); + + /* Replace conversions with mov. */ + for (unsigned comp = 0; comp < num; comp++) { + nir_instr *instr = new_vec->src[comp].src.ssa->parent_instr; + replace_with_mov(&b, &new_vec->instr, + &new_vec->src[comp].src, + nir_instr_as_alu(instr)); + } + + new_vec->dest.dest.ssa.bit_size = + new_vec->src[0].src.ssa->bit_size; + nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, + &new_vec->dest.dest.ssa); + changed = true; + } else if (is_f16_to_f32_conversion(&src_alu->instr) || + is_u16_to_u32_conversion(&src_alu->instr) || + is_i16_to_i32_conversion(&src_alu->instr)) { + /* Handle scalar sources. */ + replace_with_mov(&b, &tex->instr, &tex->src[i].src, src_alu); + changed = true; + } + } + + /* Optimize the destination. */ + bool is_f16_to_f32 = true; + bool is_f32_to_f16 = true; + bool is_i16_to_i32 = true; + bool is_i32_to_i16 = true; /* same behavior for int and uint */ + bool is_u16_to_u32 = true; + + nir_foreach_use(use, &tex->dest.ssa) { + is_f16_to_f32 &= is_f16_to_f32_conversion(use->parent_instr); + is_f32_to_f16 &= is_f32_to_f16_conversion(use->parent_instr); + is_i16_to_i32 &= is_i16_to_i32_conversion(use->parent_instr); + is_i32_to_i16 &= is_i32_to_i16_conversion(use->parent_instr); + is_u16_to_u32 &= is_u16_to_u32_conversion(use->parent_instr); + } + + if (is_f16_to_f32 || is_f32_to_f16 || is_i16_to_i32 || + is_i32_to_i16 || is_u16_to_u32) { + /* All uses are the same conversions. Replace them with mov. */ + nir_foreach_use(use, &tex->dest.ssa) { + nir_alu_instr *conv = nir_instr_as_alu(use->parent_instr); + conv->op = nir_op_mov; + tex->dest.ssa.bit_size = conv->dest.dest.ssa.bit_size; + tex->dest_type = (tex->dest_type & (~16 & ~32 & ~64)) | + conv->dest.dest.ssa.bit_size; + } + changed = true; + } + } + } + + nir_metadata_preserve(impl, nir_metadata_all); + return changed; +} + +/** + * Fix types of source operands of texture opcodes according to + * the constraints by inserting the appropriate conversion opcodes. + * + * For example, if the type of derivatives must be equal to texture + * coordinates and the type of the texture bias must be 32-bit, there + * will be 2 constraints describing that. + */ +bool +nir_legalize_16bit_sampler_srcs(nir_shader *nir, + nir_tex_src_type_constraints constraints) +{ + bool changed = false; + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + assert(impl); + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block_safe (block, impl) { + nir_foreach_instr_safe (instr, block) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + int8_t map[nir_num_tex_src_types]; + memset(map, -1, sizeof(map)); + + /* Create a mapping from src_type to src[i]. */ + for (unsigned i = 0; i < tex->num_srcs; i++) + map[tex->src[i].src_type] = i; + + /* Legalize src types. */ + for (unsigned i = 0; i < tex->num_srcs; i++) { + nir_tex_src_type_constraint c = constraints[tex->src[i].src_type]; + + if (!c.legalize_type) + continue; + + /* Determine the required bit size for the src. */ + unsigned bit_size; + if (c.bit_size) { + bit_size = c.bit_size; + } else { + if (map[c.match_src] == -1) + continue; /* e.g. txs */ + + bit_size = tex->src[map[c.match_src]].src.ssa->bit_size; + } + + /* Check if the type is legal. */ + if (bit_size == tex->src[i].src.ssa->bit_size) + continue; + + /* Fix the bit size. */ + bool is_sint = i == nir_tex_src_offset; + bool is_uint = !is_sint && + (tex->op == nir_texop_txf || + tex->op == nir_texop_txf_ms || + tex->op == nir_texop_txs || + tex->op == nir_texop_samples_identical); + nir_ssa_def *(*convert)(nir_builder *, nir_ssa_def *); + + switch (bit_size) { + case 16: + convert = is_sint ? nir_i2i16 : + is_uint ? nir_u2u16 : nir_f2f16; + break; + case 32: + convert = is_sint ? nir_i2i32 : + is_uint ? nir_u2u32 : nir_f2f32; + break; + default: + assert(!"unexpected bit size"); + continue; + } + + b.cursor = nir_before_instr(&tex->instr); + nir_ssa_def *conv = + convert(&b, nir_ssa_for_src(&b, tex->src[i].src, + tex->src[i].src.ssa->num_components)); + nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, conv); + changed = true; + } + } + } + + nir_metadata_preserve(impl, nir_metadata_all); + return changed; +} diff --git a/src/compiler/nir/nir_lower_mediump_outputs.c b/src/compiler/nir/nir_lower_mediump_outputs.c deleted file mode 100644 index 5176cea99e7..00000000000 --- a/src/compiler/nir/nir_lower_mediump_outputs.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (C) 2020 Google, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "nir.h" -#include "nir_builder.h" - -/* Lower mediump outputs to float16, int16, or uint16. */ - -void -nir_lower_mediump_outputs(nir_shader *nir) -{ - nir_function_impl *impl = nir_shader_get_entrypoint(nir); - assert(impl); - - /* Get rid of old derefs before we change the types of the variables */ - nir_opt_dce(nir); - - nir_builder b; - nir_builder_init(&b, impl); - - nir_foreach_block_safe (block, impl) { - nir_foreach_instr_safe (instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_store_output) - continue; - - if (!nir_intrinsic_io_semantics(intr).medium_precision) - break; /* can't lower */ - - switch (nir_intrinsic_src_type(intr)) { - case nir_type_float32: - b.cursor = nir_before_instr(&intr->instr); - nir_instr_rewrite_src(&intr->instr, &intr->src[0], - nir_src_for_ssa(nir_f2f16(&b, intr->src[0].ssa))); - nir_intrinsic_set_src_type(intr, nir_type_float16); - break; - - case nir_type_int32: - b.cursor = nir_before_instr(&intr->instr); - nir_instr_rewrite_src(&intr->instr, &intr->src[0], - nir_src_for_ssa(nir_i2i16(&b, intr->src[0].ssa))); - nir_intrinsic_set_src_type(intr, nir_type_int16); - break; - - case nir_type_uint32: - b.cursor = nir_before_instr(&intr->instr); - nir_instr_rewrite_src(&intr->instr, &intr->src[0], - nir_src_for_ssa(nir_u2u16(&b, intr->src[0].ssa))); - nir_intrinsic_set_src_type(intr, nir_type_uint16); - break; - - default:; - } - } - } -} diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 101fa004335..759b2ccc18f 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -367,7 +367,7 @@ ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s) if (compiler->gpu_id >= 600 && s->info.stage == MESA_SHADER_FRAGMENT && !(ir3_shader_debug & IR3_DBG_NOFP16)) { - NIR_PASS_V(s, nir_lower_mediump_outputs); + NIR_PASS_V(s, nir_lower_mediump_io, nir_var_shader_out, 0, false); } /* we cannot ensure that ir3_finalize_nir() is only called once, so diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index f5a61de3c3c..d48c8483c4e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -799,7 +799,7 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir) if (nir->info.stage == MESA_SHADER_FRAGMENT && sscreen->info.has_packed_math_16bit && sscreen->b.get_shader_param(&sscreen->b, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_FP16)) - NIR_PASS_V(nir, nir_lower_mediump_outputs); + NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out, 0, false); si_nir_opts(sscreen, nir, true); diff --git a/src/util/bitset.h b/src/util/bitset.h index 29de65e839c..b9e968293b1 100644 --- a/src/util/bitset.h +++ b/src/util/bitset.h @@ -80,6 +80,25 @@ ((x)[BITSET_BITWORD(b)] &= ~BITSET_RANGE(b, e)) : \ (assert (!"BITSET_CLEAR_RANGE: bit range crosses word boundary"), 0)) +static inline unsigned +__bitset_prefix_sum(const BITSET_WORD *x, unsigned b, unsigned n) +{ + unsigned prefix = 0; + + for (unsigned i = 0; i < n; i++) { + if ((i + 1) * BITSET_WORDBITS <= b) { + prefix += util_bitcount(x[i]); + } else { + prefix += util_bitcount(x[i] & BITFIELD_MASK(b - i * BITSET_WORDBITS)); + break; + } + } + return prefix; +} + +#define BITSET_PREFIX_SUM(x, b) \ + __bitset_prefix_sum(x, b, ARRAY_SIZE(x)) + /* Get first bit set in a bitset. */ static inline int