nir: rename to nir_opt_16bit_tex_image
Not sure what I was thinking when I wrote this pass (probably not much), but opt makes more sense and matches other nir passes. Fold is usually used for constants, and this pass handles more than those. Acked-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28662>
This commit is contained in:
@@ -680,7 +680,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
||||
}
|
||||
if (((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16) && gfx_level >= GFX9) {
|
||||
bool separate_g16 = gfx_level >= GFX10;
|
||||
struct nir_fold_tex_srcs_options fold_srcs_options[] = {
|
||||
struct nir_opt_tex_srcs_options opt_srcs_options[] = {
|
||||
{
|
||||
.sampler_dims = ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)),
|
||||
.src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) | (1 << nir_tex_src_bias) |
|
||||
@@ -692,16 +692,16 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
||||
.src_types = (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy),
|
||||
},
|
||||
};
|
||||
struct nir_fold_16bit_tex_image_options fold_16bit_options = {
|
||||
struct nir_opt_16bit_tex_image_options opt_16bit_options = {
|
||||
.rounding_mode = nir_rounding_mode_rtz,
|
||||
.fold_tex_dest_types = nir_type_float,
|
||||
.fold_image_dest_types = nir_type_float,
|
||||
.fold_image_store_data = true,
|
||||
.fold_image_srcs = !radv_use_llvm_for_stage(pdev, stage->stage),
|
||||
.fold_srcs_options_count = separate_g16 ? 2 : 1,
|
||||
.fold_srcs_options = fold_srcs_options,
|
||||
.opt_tex_dest_types = nir_type_float,
|
||||
.opt_image_dest_types = nir_type_float,
|
||||
.opt_image_store_data = true,
|
||||
.opt_image_srcs = !radv_use_llvm_for_stage(pdev, stage->stage),
|
||||
.opt_srcs_options_count = separate_g16 ? 2 : 1,
|
||||
.opt_srcs_options = opt_srcs_options,
|
||||
};
|
||||
NIR_PASS(_, stage->nir, nir_fold_16bit_tex_image, &fold_16bit_options);
|
||||
NIR_PASS(_, stage->nir, nir_opt_16bit_tex_image, &opt_16bit_options);
|
||||
|
||||
if (!stage->key.optimisations_disabled) {
|
||||
NIR_PASS(_, stage->nir, nir_opt_vectorize, opt_vectorize_callback, device);
|
||||
|
@@ -6280,23 +6280,23 @@ bool nir_force_mediump_io(nir_shader *nir, nir_variable_mode modes,
|
||||
nir_alu_type types);
|
||||
bool nir_unpack_16bit_varying_slots(nir_shader *nir, nir_variable_mode modes);
|
||||
|
||||
struct nir_fold_tex_srcs_options {
|
||||
struct nir_opt_tex_srcs_options {
|
||||
unsigned sampler_dims;
|
||||
unsigned src_types;
|
||||
};
|
||||
|
||||
struct nir_fold_16bit_tex_image_options {
|
||||
struct nir_opt_16bit_tex_image_options {
|
||||
nir_rounding_mode rounding_mode;
|
||||
nir_alu_type fold_tex_dest_types;
|
||||
nir_alu_type fold_image_dest_types;
|
||||
bool fold_image_store_data;
|
||||
bool fold_image_srcs;
|
||||
unsigned fold_srcs_options_count;
|
||||
struct nir_fold_tex_srcs_options *fold_srcs_options;
|
||||
nir_alu_type opt_tex_dest_types;
|
||||
nir_alu_type opt_image_dest_types;
|
||||
bool opt_image_store_data;
|
||||
bool opt_image_srcs;
|
||||
unsigned opt_srcs_options_count;
|
||||
struct nir_opt_tex_srcs_options *opt_srcs_options;
|
||||
};
|
||||
|
||||
bool nir_fold_16bit_tex_image(nir_shader *nir,
|
||||
struct nir_fold_16bit_tex_image_options *options);
|
||||
bool nir_opt_16bit_tex_image(nir_shader *nir,
|
||||
struct nir_opt_16bit_tex_image_options *options);
|
||||
|
||||
typedef struct {
|
||||
bool legalize_type; /* whether this src should be legalized */
|
||||
|
@@ -767,45 +767,45 @@ const_is_i16(nir_scalar scalar)
|
||||
}
|
||||
|
||||
static bool
|
||||
can_fold_16bit_src(nir_def *ssa, nir_alu_type src_type, bool sext_matters)
|
||||
can_opt_16bit_src(nir_def *ssa, nir_alu_type src_type, bool sext_matters)
|
||||
{
|
||||
bool fold_f16 = src_type == nir_type_float32;
|
||||
bool fold_u16 = src_type == nir_type_uint32 && sext_matters;
|
||||
bool fold_i16 = src_type == nir_type_int32 && sext_matters;
|
||||
bool fold_i16_u16 = (src_type == nir_type_uint32 || src_type == nir_type_int32) && !sext_matters;
|
||||
bool opt_f16 = src_type == nir_type_float32;
|
||||
bool opt_u16 = src_type == nir_type_uint32 && sext_matters;
|
||||
bool opt_i16 = src_type == nir_type_int32 && sext_matters;
|
||||
bool opt_i16_u16 = (src_type == nir_type_uint32 || src_type == nir_type_int32) && !sext_matters;
|
||||
|
||||
bool can_fold = fold_f16 || fold_u16 || fold_i16 || fold_i16_u16;
|
||||
for (unsigned i = 0; can_fold && i < ssa->num_components; i++) {
|
||||
bool can_opt = opt_f16 || opt_u16 || opt_i16 || opt_i16_u16;
|
||||
for (unsigned i = 0; can_opt && i < ssa->num_components; i++) {
|
||||
nir_scalar comp = nir_scalar_resolved(ssa, i);
|
||||
if (nir_scalar_is_undef(comp))
|
||||
continue;
|
||||
else if (nir_scalar_is_const(comp)) {
|
||||
if (fold_f16)
|
||||
can_fold &= const_is_f16(comp);
|
||||
else if (fold_u16)
|
||||
can_fold &= const_is_u16(comp);
|
||||
else if (fold_i16)
|
||||
can_fold &= const_is_i16(comp);
|
||||
else if (fold_i16_u16)
|
||||
can_fold &= (const_is_u16(comp) || const_is_i16(comp));
|
||||
if (opt_f16)
|
||||
can_opt &= const_is_f16(comp);
|
||||
else if (opt_u16)
|
||||
can_opt &= const_is_u16(comp);
|
||||
else if (opt_i16)
|
||||
can_opt &= const_is_i16(comp);
|
||||
else if (opt_i16_u16)
|
||||
can_opt &= (const_is_u16(comp) || const_is_i16(comp));
|
||||
} else {
|
||||
if (fold_f16)
|
||||
can_fold &= is_f16_to_f32_conversion(comp.def->parent_instr);
|
||||
else if (fold_u16)
|
||||
can_fold &= is_u16_to_u32_conversion(comp.def->parent_instr);
|
||||
else if (fold_i16)
|
||||
can_fold &= is_i16_to_i32_conversion(comp.def->parent_instr);
|
||||
else if (fold_i16_u16)
|
||||
can_fold &= (is_i16_to_i32_conversion(comp.def->parent_instr) ||
|
||||
is_u16_to_u32_conversion(comp.def->parent_instr));
|
||||
if (opt_f16)
|
||||
can_opt &= is_f16_to_f32_conversion(comp.def->parent_instr);
|
||||
else if (opt_u16)
|
||||
can_opt &= is_u16_to_u32_conversion(comp.def->parent_instr);
|
||||
else if (opt_i16)
|
||||
can_opt &= is_i16_to_i32_conversion(comp.def->parent_instr);
|
||||
else if (opt_i16_u16)
|
||||
can_opt &= (is_i16_to_i32_conversion(comp.def->parent_instr) ||
|
||||
is_u16_to_u32_conversion(comp.def->parent_instr));
|
||||
}
|
||||
}
|
||||
|
||||
return can_fold;
|
||||
return can_opt;
|
||||
}
|
||||
|
||||
static void
|
||||
fold_16bit_src(nir_builder *b, nir_instr *instr, nir_src *src, nir_alu_type src_type)
|
||||
opt_16bit_src(nir_builder *b, nir_instr *instr, nir_src *src, nir_alu_type src_type)
|
||||
{
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
@@ -834,17 +834,17 @@ fold_16bit_src(nir_builder *b, nir_instr *instr, nir_src *src, nir_alu_type src_
|
||||
}
|
||||
|
||||
static bool
|
||||
fold_16bit_store_data(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
opt_16bit_store_data(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_alu_type src_type = nir_intrinsic_src_type(instr);
|
||||
nir_src *data_src = &instr->src[3];
|
||||
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
|
||||
if (!can_fold_16bit_src(data_src->ssa, src_type, true))
|
||||
if (!can_opt_16bit_src(data_src->ssa, src_type, true))
|
||||
return false;
|
||||
|
||||
fold_16bit_src(b, &instr->instr, data_src, src_type);
|
||||
opt_16bit_src(b, &instr->instr, data_src, src_type);
|
||||
|
||||
nir_intrinsic_set_src_type(instr, (src_type & ~32) | 16);
|
||||
|
||||
@@ -852,8 +852,8 @@ fold_16bit_store_data(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
}
|
||||
|
||||
static bool
|
||||
fold_16bit_destination(nir_def *ssa, nir_alu_type dest_type,
|
||||
unsigned exec_mode, nir_rounding_mode rdm)
|
||||
opt_16bit_destination(nir_def *ssa, nir_alu_type dest_type,
|
||||
unsigned exec_mode, nir_rounding_mode rdm)
|
||||
{
|
||||
bool is_f32_to_f16 = dest_type == nir_type_float32;
|
||||
bool is_i32_to_i16 = dest_type == nir_type_int32 || dest_type == nir_type_uint32;
|
||||
@@ -886,15 +886,15 @@ fold_16bit_destination(nir_def *ssa, nir_alu_type dest_type,
|
||||
}
|
||||
|
||||
static bool
|
||||
fold_16bit_image_dest(nir_intrinsic_instr *instr, unsigned exec_mode,
|
||||
nir_alu_type allowed_types, nir_rounding_mode rdm)
|
||||
opt_16bit_image_dest(nir_intrinsic_instr *instr, unsigned exec_mode,
|
||||
nir_alu_type allowed_types, nir_rounding_mode rdm)
|
||||
{
|
||||
nir_alu_type dest_type = nir_intrinsic_dest_type(instr);
|
||||
|
||||
if (!(nir_alu_type_get_base_type(dest_type) & allowed_types))
|
||||
return false;
|
||||
|
||||
if (!fold_16bit_destination(&instr->def, dest_type, exec_mode, rdm))
|
||||
if (!opt_16bit_destination(&instr->def, dest_type, exec_mode, rdm))
|
||||
return false;
|
||||
|
||||
nir_intrinsic_set_dest_type(instr, (dest_type & ~32) | 16);
|
||||
@@ -903,8 +903,8 @@ fold_16bit_image_dest(nir_intrinsic_instr *instr, unsigned exec_mode,
|
||||
}
|
||||
|
||||
static bool
|
||||
fold_16bit_tex_dest(nir_tex_instr *tex, unsigned exec_mode,
|
||||
nir_alu_type allowed_types, nir_rounding_mode rdm)
|
||||
opt_16bit_tex_dest(nir_tex_instr *tex, unsigned exec_mode,
|
||||
nir_alu_type allowed_types, nir_rounding_mode rdm)
|
||||
{
|
||||
/* Skip sparse residency */
|
||||
if (tex->is_sparse)
|
||||
@@ -924,7 +924,7 @@ fold_16bit_tex_dest(nir_tex_instr *tex, unsigned exec_mode,
|
||||
if (!(nir_alu_type_get_base_type(tex->dest_type) & allowed_types))
|
||||
return false;
|
||||
|
||||
if (!fold_16bit_destination(&tex->def, tex->dest_type, exec_mode, rdm))
|
||||
if (!opt_16bit_destination(&tex->def, tex->dest_type, exec_mode, rdm))
|
||||
return false;
|
||||
|
||||
tex->dest_type = (tex->dest_type & ~32) | 16;
|
||||
@@ -932,8 +932,8 @@ fold_16bit_tex_dest(nir_tex_instr *tex, unsigned exec_mode,
|
||||
}
|
||||
|
||||
static bool
|
||||
fold_16bit_tex_srcs(nir_builder *b, nir_tex_instr *tex,
|
||||
struct nir_fold_tex_srcs_options *options)
|
||||
opt_16bit_tex_srcs(nir_builder *b, nir_tex_instr *tex,
|
||||
struct nir_opt_tex_srcs_options *options)
|
||||
{
|
||||
if (tex->op != nir_texop_tex &&
|
||||
tex->op != nir_texop_txb &&
|
||||
@@ -953,7 +953,7 @@ fold_16bit_tex_srcs(nir_builder *b, nir_tex_instr *tex,
|
||||
if (nir_tex_instr_src_index(tex, nir_tex_src_backend1) >= 0)
|
||||
return false;
|
||||
|
||||
unsigned fold_srcs = 0;
|
||||
unsigned opt_srcs = 0;
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
/* Filter out sources that should be ignored. */
|
||||
if (!(BITFIELD_BIT(tex->src[i].src_type) & options->src_types))
|
||||
@@ -968,23 +968,23 @@ fold_16bit_tex_srcs(nir_builder *b, nir_tex_instr *tex,
|
||||
* because it's out of bounds and the higher bits don't
|
||||
* matter.
|
||||
*/
|
||||
if (!can_fold_16bit_src(src->ssa, src_type, false))
|
||||
if (!can_opt_16bit_src(src->ssa, src_type, false))
|
||||
return false;
|
||||
|
||||
fold_srcs |= (1 << i);
|
||||
opt_srcs |= (1 << i);
|
||||
}
|
||||
|
||||
u_foreach_bit(i, fold_srcs) {
|
||||
u_foreach_bit(i, opt_srcs) {
|
||||
nir_src *src = &tex->src[i].src;
|
||||
nir_alu_type src_type = nir_tex_instr_src_type(tex, i) | src->ssa->bit_size;
|
||||
fold_16bit_src(b, &tex->instr, src, src_type);
|
||||
opt_16bit_src(b, &tex->instr, src, src_type);
|
||||
}
|
||||
|
||||
return !!fold_srcs;
|
||||
return !!opt_srcs;
|
||||
}
|
||||
|
||||
static bool
|
||||
fold_16bit_image_srcs(nir_builder *b, nir_intrinsic_instr *instr, int lod_idx)
|
||||
opt_16bit_image_srcs(nir_builder *b, nir_intrinsic_instr *instr, int lod_idx)
|
||||
{
|
||||
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
|
||||
bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
|
||||
@@ -993,24 +993,24 @@ fold_16bit_image_srcs(nir_builder *b, nir_intrinsic_instr *instr, int lod_idx)
|
||||
nir_src *lod = lod_idx >= 0 ? &instr->src[lod_idx] : NULL;
|
||||
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF ||
|
||||
!can_fold_16bit_src(coords->ssa, nir_type_int32, false) ||
|
||||
(sample && !can_fold_16bit_src(sample->ssa, nir_type_int32, false)) ||
|
||||
(lod && !can_fold_16bit_src(lod->ssa, nir_type_int32, false)))
|
||||
!can_opt_16bit_src(coords->ssa, nir_type_int32, false) ||
|
||||
(sample && !can_opt_16bit_src(sample->ssa, nir_type_int32, false)) ||
|
||||
(lod && !can_opt_16bit_src(lod->ssa, nir_type_int32, false)))
|
||||
return false;
|
||||
|
||||
fold_16bit_src(b, &instr->instr, coords, nir_type_int32);
|
||||
opt_16bit_src(b, &instr->instr, coords, nir_type_int32);
|
||||
if (sample)
|
||||
fold_16bit_src(b, &instr->instr, sample, nir_type_int32);
|
||||
opt_16bit_src(b, &instr->instr, sample, nir_type_int32);
|
||||
if (lod)
|
||||
fold_16bit_src(b, &instr->instr, lod, nir_type_int32);
|
||||
opt_16bit_src(b, &instr->instr, lod, nir_type_int32);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
fold_16bit_tex_image(nir_builder *b, nir_instr *instr, void *params)
|
||||
opt_16bit_tex_image(nir_builder *b, nir_instr *instr, void *params)
|
||||
{
|
||||
struct nir_fold_16bit_tex_image_options *options = params;
|
||||
struct nir_opt_16bit_tex_image_options *options = params;
|
||||
unsigned exec_mode = b->shader->info.float_controls_execution_mode;
|
||||
bool progress = false;
|
||||
|
||||
@@ -1021,26 +1021,26 @@ fold_16bit_tex_image(nir_builder *b, nir_instr *instr, void *params)
|
||||
case nir_intrinsic_bindless_image_store:
|
||||
case nir_intrinsic_image_deref_store:
|
||||
case nir_intrinsic_image_store:
|
||||
if (options->fold_image_store_data)
|
||||
progress |= fold_16bit_store_data(b, intrinsic);
|
||||
if (options->fold_image_srcs)
|
||||
progress |= fold_16bit_image_srcs(b, intrinsic, 4);
|
||||
if (options->opt_image_store_data)
|
||||
progress |= opt_16bit_store_data(b, intrinsic);
|
||||
if (options->opt_image_srcs)
|
||||
progress |= opt_16bit_image_srcs(b, intrinsic, 4);
|
||||
break;
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_load:
|
||||
if (options->fold_image_dest_types)
|
||||
progress |= fold_16bit_image_dest(intrinsic, exec_mode,
|
||||
options->fold_image_dest_types,
|
||||
options->rounding_mode);
|
||||
if (options->fold_image_srcs)
|
||||
progress |= fold_16bit_image_srcs(b, intrinsic, 3);
|
||||
if (options->opt_image_dest_types)
|
||||
progress |= opt_16bit_image_dest(intrinsic, exec_mode,
|
||||
options->opt_image_dest_types,
|
||||
options->rounding_mode);
|
||||
if (options->opt_image_srcs)
|
||||
progress |= opt_16bit_image_srcs(b, intrinsic, 3);
|
||||
break;
|
||||
case nir_intrinsic_bindless_image_sparse_load:
|
||||
case nir_intrinsic_image_deref_sparse_load:
|
||||
case nir_intrinsic_image_sparse_load:
|
||||
if (options->fold_image_srcs)
|
||||
progress |= fold_16bit_image_srcs(b, intrinsic, 3);
|
||||
if (options->opt_image_srcs)
|
||||
progress |= opt_16bit_image_srcs(b, intrinsic, 3);
|
||||
break;
|
||||
case nir_intrinsic_bindless_image_atomic:
|
||||
case nir_intrinsic_bindless_image_atomic_swap:
|
||||
@@ -1048,8 +1048,8 @@ fold_16bit_tex_image(nir_builder *b, nir_instr *instr, void *params)
|
||||
case nir_intrinsic_image_deref_atomic_swap:
|
||||
case nir_intrinsic_image_atomic:
|
||||
case nir_intrinsic_image_atomic_swap:
|
||||
if (options->fold_image_srcs)
|
||||
progress |= fold_16bit_image_srcs(b, intrinsic, -1);
|
||||
if (options->opt_image_srcs)
|
||||
progress |= opt_16bit_image_srcs(b, intrinsic, -1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -1057,12 +1057,12 @@ fold_16bit_tex_image(nir_builder *b, nir_instr *instr, void *params)
|
||||
} else if (instr->type == nir_instr_type_tex) {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
|
||||
if (options->fold_tex_dest_types)
|
||||
progress |= fold_16bit_tex_dest(tex, exec_mode, options->fold_tex_dest_types,
|
||||
options->rounding_mode);
|
||||
if (options->opt_tex_dest_types)
|
||||
progress |= opt_16bit_tex_dest(tex, exec_mode, options->opt_tex_dest_types,
|
||||
options->rounding_mode);
|
||||
|
||||
for (unsigned i = 0; i < options->fold_srcs_options_count; i++) {
|
||||
progress |= fold_16bit_tex_srcs(b, tex, &options->fold_srcs_options[i]);
|
||||
for (unsigned i = 0; i < options->opt_srcs_options_count; i++) {
|
||||
progress |= opt_16bit_tex_srcs(b, tex, &options->opt_srcs_options[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1070,11 +1070,11 @@ fold_16bit_tex_image(nir_builder *b, nir_instr *instr, void *params)
|
||||
}
|
||||
|
||||
bool
|
||||
nir_fold_16bit_tex_image(nir_shader *nir,
|
||||
struct nir_fold_16bit_tex_image_options *options)
|
||||
nir_opt_16bit_tex_image(nir_shader *nir,
|
||||
struct nir_opt_16bit_tex_image_options *options)
|
||||
{
|
||||
return nir_shader_instructions_pass(nir,
|
||||
fold_16bit_tex_image,
|
||||
opt_16bit_tex_image,
|
||||
nir_metadata_block_index | nir_metadata_dominance,
|
||||
options);
|
||||
}
|
||||
|
@@ -859,7 +859,7 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
|
||||
* coordinates that had been upconverted to 32-bits just for the
|
||||
* sampler to just be 16-bit texture sources.
|
||||
*/
|
||||
struct nir_fold_tex_srcs_options fold_srcs_options = {
|
||||
struct nir_opt_tex_srcs_options opt_srcs_options = {
|
||||
.sampler_dims = ~0,
|
||||
.src_types = (1 << nir_tex_src_coord) |
|
||||
(1 << nir_tex_src_lod) |
|
||||
@@ -871,17 +871,17 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
|
||||
(1 << nir_tex_src_ddx) |
|
||||
(1 << nir_tex_src_ddy),
|
||||
};
|
||||
struct nir_fold_16bit_tex_image_options fold_16bit_options = {
|
||||
struct nir_opt_16bit_tex_image_options opt_16bit_options = {
|
||||
.rounding_mode = nir_rounding_mode_rtz,
|
||||
.fold_tex_dest_types = nir_type_float,
|
||||
.opt_tex_dest_types = nir_type_float,
|
||||
/* blob dumps have no half regs on pixel 2's ldib or stib, so only enable for a6xx+. */
|
||||
.fold_image_dest_types = so->compiler->gen >= 6 ?
|
||||
.opt_image_dest_types = so->compiler->gen >= 6 ?
|
||||
nir_type_float | nir_type_uint | nir_type_int : 0,
|
||||
.fold_image_store_data = so->compiler->gen >= 6,
|
||||
.fold_srcs_options_count = 1,
|
||||
.fold_srcs_options = &fold_srcs_options,
|
||||
.opt_image_store_data = so->compiler->gen >= 6,
|
||||
.opt_srcs_options_count = 1,
|
||||
.opt_srcs_options = &opt_srcs_options,
|
||||
};
|
||||
OPT(s, nir_fold_16bit_tex_image, &fold_16bit_options);
|
||||
OPT(s, nir_opt_16bit_tex_image, &opt_16bit_options);
|
||||
}
|
||||
OPT_V(s, nir_opt_constant_folding);
|
||||
OPT_V(s, nir_copy_prop);
|
||||
|
@@ -193,7 +193,7 @@ static void si_late_optimize_16bit_samplers(struct si_screen *sscreen, nir_shade
|
||||
* We only use a16/g16 if all of the affected sources are 16bit.
|
||||
*/
|
||||
bool has_g16 = sscreen->info.gfx_level >= GFX10;
|
||||
struct nir_fold_tex_srcs_options fold_srcs_options[] = {
|
||||
struct nir_opt_tex_srcs_options opt_srcs_options[] = {
|
||||
{
|
||||
.sampler_dims =
|
||||
~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)),
|
||||
@@ -207,16 +207,16 @@ static void si_late_optimize_16bit_samplers(struct si_screen *sscreen, nir_shade
|
||||
.src_types = (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy),
|
||||
},
|
||||
};
|
||||
struct nir_fold_16bit_tex_image_options fold_16bit_options = {
|
||||
struct nir_opt_16bit_tex_image_options opt_16bit_options = {
|
||||
.rounding_mode = nir_rounding_mode_rtz,
|
||||
.fold_tex_dest_types = nir_type_float,
|
||||
.fold_image_dest_types = nir_type_float,
|
||||
.fold_image_store_data = true,
|
||||
.fold_srcs_options_count = has_g16 ? 2 : 1,
|
||||
.fold_srcs_options = fold_srcs_options,
|
||||
.opt_tex_dest_types = nir_type_float,
|
||||
.opt_image_dest_types = nir_type_float,
|
||||
.opt_image_store_data = true,
|
||||
.opt_srcs_options_count = has_g16 ? 2 : 1,
|
||||
.opt_srcs_options = opt_srcs_options,
|
||||
};
|
||||
bool changed = false;
|
||||
NIR_PASS(changed, nir, nir_fold_16bit_tex_image, &fold_16bit_options);
|
||||
NIR_PASS(changed, nir, nir_opt_16bit_tex_image, &opt_16bit_options);
|
||||
|
||||
if (changed) {
|
||||
si_nir_opts(sscreen, nir, false);
|
||||
|
@@ -483,11 +483,11 @@ lvp_shader_lower(struct lvp_device *pdevice, struct lvp_pipeline *pipeline, nir_
|
||||
|
||||
// TODO: also optimize the tex srcs. see radeonSI for reference */
|
||||
/* Skip if there are potentially conflicting rounding modes */
|
||||
struct nir_fold_16bit_tex_image_options fold_16bit_options = {
|
||||
struct nir_opt_16bit_tex_image_options opt_16bit_options = {
|
||||
.rounding_mode = nir_rounding_mode_undef,
|
||||
.fold_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
|
||||
.opt_tex_dest_types = nir_type_float | nir_type_uint | nir_type_int,
|
||||
};
|
||||
NIR_PASS_V(nir, nir_fold_16bit_tex_image, &fold_16bit_options);
|
||||
NIR_PASS_V(nir, nir_opt_16bit_tex_image, &opt_16bit_options);
|
||||
|
||||
/* Lower texture OPs llvmpipe supports to reduce the amount of sample
|
||||
* functions that need to be pre-compiled.
|
||||
|
Reference in New Issue
Block a user