
Otherwise drivers that don't use 16-bit slots for varyings will get
confused and have their driver_locations scribbled over. This has caused
multiple problems for both Panfrost and Asahi this week. Given the only
other user of the pass for varyings is radeonsi, which needs both
together, I think this is the least controversial fix.
Fixes: fb29cef8dd
("nir: add many passes that lower and optimize 16-bit input/outputs and samplers")
Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11732>
612 lines
21 KiB
C
612 lines
21 KiB
C
/*
|
|
* Copyright (C) 2020 Google, Inc.
|
|
* Copyright (C) 2021 Advanced Micro Devices, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include "nir.h"
|
|
#include "nir_builder.h"
|
|
|
|
/**
|
|
* Return the intrinsic if it matches the mask in "modes", else return NULL.
|
|
*/
|
|
static nir_intrinsic_instr *
|
|
get_io_intrinsic(nir_instr *instr, nir_variable_mode modes,
|
|
nir_variable_mode *out_mode)
|
|
{
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return NULL;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_load_input:
|
|
case nir_intrinsic_load_input_vertex:
|
|
case nir_intrinsic_load_interpolated_input:
|
|
case nir_intrinsic_load_per_vertex_input:
|
|
*out_mode = nir_var_shader_in;
|
|
return modes & nir_var_shader_in ? intr : NULL;
|
|
case nir_intrinsic_load_output:
|
|
case nir_intrinsic_load_per_vertex_output:
|
|
case nir_intrinsic_store_output:
|
|
case nir_intrinsic_store_per_vertex_output:
|
|
*out_mode = nir_var_shader_out;
|
|
return modes & nir_var_shader_out ? intr : NULL;
|
|
default:
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Recompute the IO "base" indices from scratch to remove holes or to fix
|
|
* incorrect base values due to changes in IO locations by using IO locations
|
|
* to assign new bases. The mapping from locations to bases becomes
|
|
* monotonically increasing.
|
|
*/
|
|
bool
|
|
nir_recompute_io_bases(nir_function_impl *impl, nir_variable_mode modes)
|
|
{
|
|
BITSET_DECLARE(inputs, NUM_TOTAL_VARYING_SLOTS);
|
|
BITSET_DECLARE(outputs, NUM_TOTAL_VARYING_SLOTS);
|
|
BITSET_ZERO(inputs);
|
|
BITSET_ZERO(outputs);
|
|
|
|
/* Gather the bitmasks of used locations. */
|
|
nir_foreach_block_safe (block, impl) {
|
|
nir_foreach_instr_safe (instr, block) {
|
|
nir_variable_mode mode;
|
|
nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode);
|
|
if (!intr)
|
|
continue;
|
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
unsigned num_slots = sem.num_slots;
|
|
if (sem.medium_precision)
|
|
num_slots = (num_slots + sem.high_16bits + 1) / 2;
|
|
|
|
if (mode == nir_var_shader_in) {
|
|
for (unsigned i = 0; i < num_slots; i++)
|
|
BITSET_SET(inputs, sem.location + i);
|
|
} else if (!sem.dual_source_blend_index) {
|
|
for (unsigned i = 0; i < num_slots; i++)
|
|
BITSET_SET(outputs, sem.location + i);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Renumber bases. */
|
|
bool changed = false;
|
|
|
|
nir_foreach_block_safe (block, impl) {
|
|
nir_foreach_instr_safe (instr, block) {
|
|
nir_variable_mode mode;
|
|
nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode);
|
|
if (!intr)
|
|
continue;
|
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
unsigned num_slots = sem.num_slots;
|
|
if (sem.medium_precision)
|
|
num_slots = (num_slots + sem.high_16bits + 1) / 2;
|
|
|
|
if (mode == nir_var_shader_in) {
|
|
nir_intrinsic_set_base(intr,
|
|
BITSET_PREFIX_SUM(inputs, sem.location));
|
|
} else if (sem.dual_source_blend_index) {
|
|
nir_intrinsic_set_base(intr,
|
|
BITSET_PREFIX_SUM(outputs, NUM_TOTAL_VARYING_SLOTS));
|
|
} else {
|
|
nir_intrinsic_set_base(intr,
|
|
BITSET_PREFIX_SUM(outputs, sem.location));
|
|
}
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
return changed;
|
|
}
|
|
|
|
/**
|
|
* Lower mediump inputs and/or outputs to 16 bits.
|
|
*
|
|
* \param modes Whether to lower inputs, outputs, or both.
|
|
* \param varying_mask Determines which varyings to skip (VS inputs,
|
|
* FS outputs, and patch varyings ignore this mask).
|
|
* \param use_16bit_slots Remap lowered slots to* VARYING_SLOT_VARn_16BIT.
|
|
*/
|
|
bool
|
|
nir_lower_mediump_io(nir_shader *nir, nir_variable_mode modes,
|
|
uint64_t varying_mask, bool use_16bit_slots)
|
|
{
|
|
bool changed = false;
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
|
assert(impl);
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
|
|
nir_foreach_block_safe (block, impl) {
|
|
nir_foreach_instr_safe (instr, block) {
|
|
nir_variable_mode mode;
|
|
nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode);
|
|
if (!intr)
|
|
continue;
|
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
nir_ssa_def *(*convert)(nir_builder *, nir_ssa_def *);
|
|
bool is_varying = !(nir->info.stage == MESA_SHADER_VERTEX &&
|
|
mode == nir_var_shader_in) &&
|
|
!(nir->info.stage == MESA_SHADER_FRAGMENT &&
|
|
mode == nir_var_shader_out);
|
|
|
|
if (!sem.medium_precision ||
|
|
(is_varying && sem.location <= VARYING_SLOT_VAR31 &&
|
|
!(varying_mask & BITFIELD64_BIT(sem.location))))
|
|
continue; /* can't lower */
|
|
|
|
if (nir_intrinsic_has_src_type(intr)) {
|
|
/* Stores. */
|
|
nir_alu_type type = nir_intrinsic_src_type(intr);
|
|
|
|
switch (type) {
|
|
case nir_type_float32:
|
|
convert = nir_f2fmp;
|
|
break;
|
|
case nir_type_int32:
|
|
case nir_type_uint32:
|
|
convert = nir_i2imp;
|
|
break;
|
|
default:
|
|
continue; /* already lowered? */
|
|
}
|
|
|
|
/* Convert the 32-bit store into a 16-bit store. */
|
|
b.cursor = nir_before_instr(&intr->instr);
|
|
nir_instr_rewrite_src_ssa(&intr->instr, &intr->src[0],
|
|
convert(&b, intr->src[0].ssa));
|
|
nir_intrinsic_set_src_type(intr, (type & ~32) | 16);
|
|
} else {
|
|
/* Loads. */
|
|
nir_alu_type type = nir_intrinsic_dest_type(intr);
|
|
|
|
switch (type) {
|
|
case nir_type_float32:
|
|
convert = nir_f2f32;
|
|
break;
|
|
case nir_type_int32:
|
|
convert = nir_i2i32;
|
|
break;
|
|
case nir_type_uint32:
|
|
convert = nir_u2u32;
|
|
break;
|
|
default:
|
|
continue; /* already lowered? */
|
|
}
|
|
|
|
/* Convert the 32-bit load into a 16-bit load. */
|
|
b.cursor = nir_after_instr(&intr->instr);
|
|
intr->dest.ssa.bit_size = 16;
|
|
nir_intrinsic_set_dest_type(intr, (type & ~32) | 16);
|
|
nir_ssa_def *dst = convert(&b, &intr->dest.ssa);
|
|
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dst,
|
|
dst->parent_instr);
|
|
}
|
|
|
|
if (use_16bit_slots && is_varying &&
|
|
sem.location >= VARYING_SLOT_VAR0 &&
|
|
sem.location <= VARYING_SLOT_VAR31) {
|
|
unsigned index = sem.location - VARYING_SLOT_VAR0;
|
|
|
|
sem.location = VARYING_SLOT_VAR0_16BIT + index / 2;
|
|
sem.high_16bits = index % 2;
|
|
nir_intrinsic_set_io_semantics(intr, sem);
|
|
}
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
if (changed && use_16bit_slots)
|
|
nir_recompute_io_bases(impl, modes);
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
return changed;
|
|
}
|
|
|
|
/**
|
|
* Set the mediump precision bit for those shader inputs and outputs that are
|
|
* set in the "modes" mask. Non-generic varyings (that GLES3 doesn't have)
|
|
* are ignored. The "types" mask can be (nir_type_float | nir_type_int), etc.
|
|
*/
|
|
bool
|
|
nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes,
|
|
nir_alu_type types)
|
|
{
|
|
bool changed = false;
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
|
assert(impl);
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
|
|
nir_foreach_block_safe (block, impl) {
|
|
nir_foreach_instr_safe (instr, block) {
|
|
nir_variable_mode mode;
|
|
nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode);
|
|
if (!intr)
|
|
continue;
|
|
|
|
nir_alu_type type;
|
|
if (nir_intrinsic_has_src_type(intr))
|
|
type = nir_intrinsic_src_type(intr);
|
|
else
|
|
type = nir_intrinsic_dest_type(intr);
|
|
if (!(type & types))
|
|
continue;
|
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
|
|
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
|
mode == nir_var_shader_out) {
|
|
/* Only accept FS outputs. */
|
|
if (sem.location < FRAG_RESULT_DATA0 &&
|
|
sem.location != FRAG_RESULT_COLOR)
|
|
continue;
|
|
} else if (nir->info.stage == MESA_SHADER_VERTEX &&
|
|
mode == nir_var_shader_in) {
|
|
/* Accept all VS inputs. */
|
|
} else {
|
|
/* Only accept generic varyings. */
|
|
if (sem.location < VARYING_SLOT_VAR0 ||
|
|
sem.location > VARYING_SLOT_VAR31)
|
|
continue;
|
|
}
|
|
|
|
sem.medium_precision = 1;
|
|
nir_intrinsic_set_io_semantics(intr, sem);
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
return changed;
|
|
}
|
|
|
|
/**
|
|
* Remap 16-bit varying slots to the original 32-bit varying slots.
|
|
* This only changes IO semantics and bases.
|
|
*/
|
|
bool
|
|
nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes)
|
|
{
|
|
bool changed = false;
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
|
assert(impl);
|
|
|
|
nir_foreach_block_safe (block, impl) {
|
|
nir_foreach_instr_safe (instr, block) {
|
|
nir_variable_mode mode;
|
|
nir_intrinsic_instr *intr = get_io_intrinsic(instr, modes, &mode);
|
|
if (!intr)
|
|
continue;
|
|
|
|
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
|
|
|
if (sem.location < VARYING_SLOT_VAR0_16BIT ||
|
|
sem.location > VARYING_SLOT_VAR15_16BIT)
|
|
continue;
|
|
|
|
sem.location = VARYING_SLOT_VAR0 +
|
|
(sem.location - VARYING_SLOT_VAR0_16BIT) * 2 +
|
|
sem.high_16bits;
|
|
sem.high_16bits = 0;
|
|
nir_intrinsic_set_io_semantics(intr, sem);
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
if (changed)
|
|
nir_recompute_io_bases(impl, modes);
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
return changed;
|
|
}
|
|
|
|
static bool
|
|
is_n_to_m_conversion(nir_instr *instr, unsigned n, nir_op m)
|
|
{
|
|
if (instr->type != nir_instr_type_alu)
|
|
return false;
|
|
|
|
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
|
return alu->op == m && alu->src[0].src.ssa->bit_size == n;
|
|
}
|
|
|
|
static bool
|
|
is_f16_to_f32_conversion(nir_instr *instr)
|
|
{
|
|
return is_n_to_m_conversion(instr, 16, nir_op_f2f32);
|
|
}
|
|
|
|
static bool
|
|
is_f32_to_f16_conversion(nir_instr *instr)
|
|
{
|
|
return is_n_to_m_conversion(instr, 32, nir_op_f2f16) ||
|
|
is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtne) ||
|
|
is_n_to_m_conversion(instr, 32, nir_op_f2fmp);
|
|
}
|
|
|
|
static bool
|
|
is_i16_to_i32_conversion(nir_instr *instr)
|
|
{
|
|
return is_n_to_m_conversion(instr, 16, nir_op_i2i32);
|
|
}
|
|
|
|
static bool
|
|
is_u16_to_u32_conversion(nir_instr *instr)
|
|
{
|
|
return is_n_to_m_conversion(instr, 16, nir_op_u2u32);
|
|
}
|
|
|
|
static bool
|
|
is_i32_to_i16_conversion(nir_instr *instr)
|
|
{
|
|
return is_n_to_m_conversion(instr, 32, nir_op_i2i16);
|
|
}
|
|
|
|
static void
|
|
replace_with_mov(nir_builder *b, nir_instr *instr, nir_src *src,
|
|
nir_alu_instr *alu)
|
|
{
|
|
nir_ssa_def *mov = nir_mov_alu(b, alu->src[0],
|
|
nir_dest_num_components(alu->dest.dest));
|
|
assert(!alu->dest.saturate);
|
|
nir_instr_rewrite_src_ssa(instr, src, mov);
|
|
}
|
|
|
|
/**
|
|
* If texture source operands use f16->f32 conversions or return values are
|
|
* followed by f16->f32 or f32->f16, remove those conversions. This benefits
|
|
* drivers that have texture opcodes that can accept and return 16-bit types.
|
|
*
|
|
* "tex_src_types" is a mask of nir_tex_src_* operands that should be handled.
|
|
* It's always done for the destination.
|
|
*
|
|
* This should be run after late algebraic optimizations.
|
|
* Copy propagation and DCE should be run after this.
|
|
*/
|
|
bool
|
|
nir_fold_16bit_sampler_conversions(nir_shader *nir,
|
|
unsigned tex_src_types)
|
|
{
|
|
bool changed = false;
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
|
assert(impl);
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
|
|
nir_foreach_block_safe (block, impl) {
|
|
nir_foreach_instr_safe (instr, block) {
|
|
if (instr->type != nir_instr_type_tex)
|
|
continue;
|
|
|
|
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
|
nir_instr *src;
|
|
nir_alu_instr *src_alu;
|
|
|
|
/* Skip because AMD doesn't support 16-bit types with these. */
|
|
if ((tex->op == nir_texop_txs ||
|
|
tex->op == nir_texop_query_levels) ||
|
|
tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
|
|
continue;
|
|
|
|
/* Optimize source operands. */
|
|
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
|
/* Filter out sources that should be ignored. */
|
|
if (!(BITFIELD_BIT(tex->src[i].src_type) & tex_src_types))
|
|
continue;
|
|
|
|
src = tex->src[i].src.ssa->parent_instr;
|
|
if (src->type != nir_instr_type_alu)
|
|
continue;
|
|
|
|
src_alu = nir_instr_as_alu(src);
|
|
b.cursor = nir_before_instr(src);
|
|
|
|
if (src_alu->op == nir_op_mov) {
|
|
assert(!"The IR shouldn't contain any movs to make this pass"
|
|
" effective.");
|
|
continue;
|
|
}
|
|
|
|
/* Handle vector sources that are made of scalar instructions. */
|
|
if (nir_op_is_vec(src_alu->op)) {
|
|
/* See if the vector is made of f16->f32 opcodes. */
|
|
unsigned num = nir_dest_num_components(src_alu->dest.dest);
|
|
bool is_f16_to_f32 = true;
|
|
bool is_u16_to_u32 = true;
|
|
|
|
for (unsigned comp = 0; comp < num; comp++) {
|
|
nir_instr *instr = src_alu->src[comp].src.ssa->parent_instr;
|
|
is_f16_to_f32 &= is_f16_to_f32_conversion(instr);
|
|
/* Zero-extension (u16) and sign-extension (i16) have
|
|
* the same behavior here - txf returns 0 if bit 15 is set
|
|
* because it's out of bounds and the higher bits don't
|
|
* matter.
|
|
*/
|
|
is_u16_to_u32 &= is_u16_to_u32_conversion(instr) ||
|
|
is_i16_to_i32_conversion(instr);
|
|
}
|
|
|
|
if (!is_f16_to_f32 && !is_u16_to_u32)
|
|
continue;
|
|
|
|
nir_alu_instr *new_vec = nir_alu_instr_clone(nir, src_alu);
|
|
nir_instr_insert_after(&src_alu->instr, &new_vec->instr);
|
|
|
|
/* Replace conversions with mov. */
|
|
for (unsigned comp = 0; comp < num; comp++) {
|
|
nir_instr *instr = new_vec->src[comp].src.ssa->parent_instr;
|
|
replace_with_mov(&b, &new_vec->instr,
|
|
&new_vec->src[comp].src,
|
|
nir_instr_as_alu(instr));
|
|
}
|
|
|
|
new_vec->dest.dest.ssa.bit_size =
|
|
new_vec->src[0].src.ssa->bit_size;
|
|
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src,
|
|
&new_vec->dest.dest.ssa);
|
|
changed = true;
|
|
} else if (is_f16_to_f32_conversion(&src_alu->instr) ||
|
|
is_u16_to_u32_conversion(&src_alu->instr) ||
|
|
is_i16_to_i32_conversion(&src_alu->instr)) {
|
|
/* Handle scalar sources. */
|
|
replace_with_mov(&b, &tex->instr, &tex->src[i].src, src_alu);
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
/* Optimize the destination. */
|
|
bool is_f16_to_f32 = true;
|
|
bool is_f32_to_f16 = true;
|
|
bool is_i16_to_i32 = true;
|
|
bool is_i32_to_i16 = true; /* same behavior for int and uint */
|
|
bool is_u16_to_u32 = true;
|
|
|
|
nir_foreach_use(use, &tex->dest.ssa) {
|
|
is_f16_to_f32 &= is_f16_to_f32_conversion(use->parent_instr);
|
|
is_f32_to_f16 &= is_f32_to_f16_conversion(use->parent_instr);
|
|
is_i16_to_i32 &= is_i16_to_i32_conversion(use->parent_instr);
|
|
is_i32_to_i16 &= is_i32_to_i16_conversion(use->parent_instr);
|
|
is_u16_to_u32 &= is_u16_to_u32_conversion(use->parent_instr);
|
|
}
|
|
|
|
if (is_f16_to_f32 || is_f32_to_f16 || is_i16_to_i32 ||
|
|
is_i32_to_i16 || is_u16_to_u32) {
|
|
/* All uses are the same conversions. Replace them with mov. */
|
|
nir_foreach_use(use, &tex->dest.ssa) {
|
|
nir_alu_instr *conv = nir_instr_as_alu(use->parent_instr);
|
|
conv->op = nir_op_mov;
|
|
tex->dest.ssa.bit_size = conv->dest.dest.ssa.bit_size;
|
|
tex->dest_type = (tex->dest_type & (~16 & ~32 & ~64)) |
|
|
conv->dest.dest.ssa.bit_size;
|
|
}
|
|
changed = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
return changed;
|
|
}
|
|
|
|
/**
|
|
* Fix types of source operands of texture opcodes according to
|
|
* the constraints by inserting the appropriate conversion opcodes.
|
|
*
|
|
* For example, if the type of derivatives must be equal to texture
|
|
* coordinates and the type of the texture bias must be 32-bit, there
|
|
* will be 2 constraints describing that.
|
|
*/
|
|
bool
|
|
nir_legalize_16bit_sampler_srcs(nir_shader *nir,
|
|
nir_tex_src_type_constraints constraints)
|
|
{
|
|
bool changed = false;
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
|
assert(impl);
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
|
|
nir_foreach_block_safe (block, impl) {
|
|
nir_foreach_instr_safe (instr, block) {
|
|
if (instr->type != nir_instr_type_tex)
|
|
continue;
|
|
|
|
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
|
int8_t map[nir_num_tex_src_types];
|
|
memset(map, -1, sizeof(map));
|
|
|
|
/* Create a mapping from src_type to src[i]. */
|
|
for (unsigned i = 0; i < tex->num_srcs; i++)
|
|
map[tex->src[i].src_type] = i;
|
|
|
|
/* Legalize src types. */
|
|
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
|
nir_tex_src_type_constraint c = constraints[tex->src[i].src_type];
|
|
|
|
if (!c.legalize_type)
|
|
continue;
|
|
|
|
/* Determine the required bit size for the src. */
|
|
unsigned bit_size;
|
|
if (c.bit_size) {
|
|
bit_size = c.bit_size;
|
|
} else {
|
|
if (map[c.match_src] == -1)
|
|
continue; /* e.g. txs */
|
|
|
|
bit_size = tex->src[map[c.match_src]].src.ssa->bit_size;
|
|
}
|
|
|
|
/* Check if the type is legal. */
|
|
if (bit_size == tex->src[i].src.ssa->bit_size)
|
|
continue;
|
|
|
|
/* Fix the bit size. */
|
|
bool is_sint = i == nir_tex_src_offset;
|
|
bool is_uint = !is_sint &&
|
|
(tex->op == nir_texop_txf ||
|
|
tex->op == nir_texop_txf_ms ||
|
|
tex->op == nir_texop_txs ||
|
|
tex->op == nir_texop_samples_identical);
|
|
nir_ssa_def *(*convert)(nir_builder *, nir_ssa_def *);
|
|
|
|
switch (bit_size) {
|
|
case 16:
|
|
convert = is_sint ? nir_i2i16 :
|
|
is_uint ? nir_u2u16 : nir_f2f16;
|
|
break;
|
|
case 32:
|
|
convert = is_sint ? nir_i2i32 :
|
|
is_uint ? nir_u2u32 : nir_f2f32;
|
|
break;
|
|
default:
|
|
assert(!"unexpected bit size");
|
|
continue;
|
|
}
|
|
|
|
b.cursor = nir_before_instr(&tex->instr);
|
|
nir_ssa_def *conv =
|
|
convert(&b, nir_ssa_for_src(&b, tex->src[i].src,
|
|
tex->src[i].src.ssa->num_components));
|
|
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, conv);
|
|
changed = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_all);
|
|
return changed;
|
|
}
|