nir: Switch to using nir_vec_scalars() for things that used nir_channel().
This should reduce follow-on optimization work to copy-propagate and dead-code away the movs generated in construction of vectors. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14865>
This commit is contained in:
@@ -1014,15 +1014,15 @@ nir_pad_vector(nir_builder *b, nir_ssa_def *src, unsigned num_components)
|
||||
if (src->num_components == num_components)
|
||||
return src;
|
||||
|
||||
nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, src->bit_size);
|
||||
nir_ssa_scalar components[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_ssa_scalar undef = nir_get_ssa_scalar(nir_ssa_undef(b, 1, src->bit_size), 0);
|
||||
unsigned i = 0;
|
||||
for (; i < src->num_components; i++)
|
||||
components[i] = nir_channel(b, src, i);
|
||||
components[i] = nir_get_ssa_scalar(src, i);
|
||||
for (; i < num_components; i++)
|
||||
components[i] = undef;
|
||||
|
||||
return nir_vec(b, components, num_components);
|
||||
return nir_vec_scalars(b, components, num_components);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1038,15 +1038,15 @@ nir_pad_vector_imm_int(nir_builder *b, nir_ssa_def *src, uint64_t imm_val,
|
||||
if (src->num_components == num_components)
|
||||
return src;
|
||||
|
||||
nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_ssa_def *imm = nir_imm_intN_t(b, imm_val, src->bit_size);
|
||||
nir_ssa_scalar components[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_ssa_scalar imm = nir_get_ssa_scalar(nir_imm_intN_t(b, imm_val, src->bit_size), 0);
|
||||
unsigned i = 0;
|
||||
for (; i < src->num_components; i++)
|
||||
components[i] = nir_channel(b, src, i);
|
||||
components[i] = nir_get_ssa_scalar(src, i);
|
||||
for (; i < num_components; i++)
|
||||
components[i] = imm;
|
||||
|
||||
return nir_vec(b, components, num_components);
|
||||
return nir_vec_scalars(b, components, num_components);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -45,17 +45,17 @@ lower_cube_size(nir_builder *b, nir_intrinsic_instr *intrin)
|
||||
nir_builder_instr_insert(b, &_2darray_size->instr);
|
||||
|
||||
nir_ssa_def *size = nir_instr_ssa_def(&_2darray_size->instr);
|
||||
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, };
|
||||
nir_ssa_scalar comps[NIR_MAX_VEC_COMPONENTS] = { 0 };
|
||||
unsigned coord_comps = intrin->dest.ssa.num_components;
|
||||
for (unsigned c = 0; c < coord_comps; c++) {
|
||||
if (c == 2) {
|
||||
comps[2] = nir_idiv(b, nir_channel(b, size, 2), nir_imm_int(b, 6));
|
||||
comps[2] = nir_get_ssa_scalar(nir_idiv(b, nir_channel(b, size, 2), nir_imm_int(b, 6)), 0);
|
||||
} else {
|
||||
comps[c] = nir_channel(b, size, c);
|
||||
comps[c] = nir_get_ssa_scalar(size, c);
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
|
||||
nir_ssa_def *vec = nir_vec_scalars(b, comps, intrin->dest.ssa.num_components);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, vec);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
nir_instr_free(&intrin->instr);
|
||||
|
@@ -556,18 +556,18 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes)
|
||||
|
||||
assert(intrin->src[1].is_ssa);
|
||||
nir_ssa_def *old_value = intrin->src[1].ssa;
|
||||
nir_ssa_def *comps[4];
|
||||
nir_ssa_scalar comps[4];
|
||||
for (unsigned c = 0; c < intrin->num_components; c++) {
|
||||
if (new_frac + c >= old_frac &&
|
||||
(old_wrmask & 1 << (new_frac + c - old_frac))) {
|
||||
comps[c] = nir_channel(&b, old_value,
|
||||
comps[c] = nir_get_ssa_scalar(old_value,
|
||||
new_frac + c - old_frac);
|
||||
} else {
|
||||
comps[c] = nir_ssa_undef(&b, old_value->num_components,
|
||||
old_value->bit_size);
|
||||
comps[c] = nir_get_ssa_scalar(nir_ssa_undef(&b, old_value->num_components,
|
||||
old_value->bit_size), 0);
|
||||
}
|
||||
}
|
||||
nir_ssa_def *new_value = nir_vec(&b, comps, intrin->num_components);
|
||||
nir_ssa_def *new_value = nir_vec_scalars(&b, comps, intrin->num_components);
|
||||
nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
|
||||
nir_src_for_ssa(new_value));
|
||||
|
||||
|
@@ -411,12 +411,12 @@ lower_compute_system_value_instr(nir_builder *b,
|
||||
is_zero |= b->shader->info.workgroup_size[1] == 1 ? 0x2 : 0x0;
|
||||
is_zero |= b->shader->info.workgroup_size[2] == 1 ? 0x4 : 0x0;
|
||||
if (!b->shader->info.workgroup_size_variable && is_zero) {
|
||||
nir_ssa_def *defs[3];
|
||||
nir_ssa_scalar defs[3];
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
defs[i] = is_zero & (1 << i) ? nir_imm_zero(b, 1, 32) :
|
||||
nir_channel(b, &intrin->dest.ssa, i);
|
||||
defs[i] = is_zero & (1 << i) ? nir_get_ssa_scalar(nir_imm_zero(b, 1, 32), 0) :
|
||||
nir_get_ssa_scalar(&intrin->dest.ssa, i);
|
||||
}
|
||||
return nir_vec(b, defs, 3);
|
||||
return nir_vec_scalars(b, defs, 3);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
@@ -939,15 +939,15 @@ swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
|
||||
/* We have no 0s or 1s, just emit a swizzling MOV */
|
||||
swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
|
||||
} else {
|
||||
nir_ssa_def *srcs[4];
|
||||
nir_ssa_scalar srcs[4];
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
if (swizzle[i] < 4) {
|
||||
srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
|
||||
srcs[i] = nir_get_ssa_scalar(&tex->dest.ssa, swizzle[i]);
|
||||
} else {
|
||||
srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
|
||||
srcs[i] = nir_get_ssa_scalar(get_zero_or_one(b, tex->dest_type, swizzle[i]), 0);
|
||||
}
|
||||
}
|
||||
swizzled = nir_vec(b, srcs, 4);
|
||||
swizzled = nir_vec_scalars(b, srcs, 4);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1083,7 +1083,8 @@ lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
|
||||
|
||||
b->cursor = nir_after_instr(&tex->instr);
|
||||
|
||||
nir_ssa_def *dest[5] = {NULL};
|
||||
nir_ssa_scalar dest[5] = { 0 };
|
||||
nir_ssa_def *residency = NULL;
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
|
||||
tex_copy->op = tex->op;
|
||||
@@ -1112,14 +1113,18 @@ lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
|
||||
|
||||
nir_builder_instr_insert(b, &tex_copy->instr);
|
||||
|
||||
dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3);
|
||||
dest[i] = nir_get_ssa_scalar(&tex_copy->dest.ssa, 3);
|
||||
if (tex->is_sparse) {
|
||||
nir_ssa_def *code = nir_channel(b, &tex_copy->dest.ssa, 4);
|
||||
dest[4] = dest[4] ? nir_sparse_residency_code_and(b, dest[4], code) : code;
|
||||
if (residency)
|
||||
residency = nir_sparse_residency_code_and(b, residency, code);
|
||||
else
|
||||
residency = code;
|
||||
}
|
||||
}
|
||||
dest[4] = nir_get_ssa_scalar(residency, 0);
|
||||
|
||||
nir_ssa_def *res = nir_vec(b, dest, tex->dest.ssa.num_components);
|
||||
nir_ssa_def *res = nir_vec_scalars(b, dest, tex->dest.ssa.num_components);
|
||||
nir_ssa_def_rewrite_uses(&tex->dest.ssa, res);
|
||||
nir_instr_remove(&tex->instr);
|
||||
|
||||
|
@@ -653,15 +653,15 @@ rename_variables(struct lower_variables_state *state)
|
||||
* written values with the existing contents of unwritten
|
||||
* channels, creating a new SSA value for the whole vector.
|
||||
*/
|
||||
nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_ssa_scalar srcs[NIR_MAX_VEC_COMPONENTS];
|
||||
for (unsigned i = 0; i < intrin->num_components; i++) {
|
||||
if (wrmask & (1 << i)) {
|
||||
srcs[i] = nir_channel(&b, value, i);
|
||||
srcs[i] = nir_get_ssa_scalar(value, i);
|
||||
} else {
|
||||
srcs[i] = nir_channel(&b, old_def, i);
|
||||
srcs[i] = nir_get_ssa_scalar(old_def, i);
|
||||
}
|
||||
}
|
||||
new_def = nir_vec(&b, srcs, intrin->num_components);
|
||||
new_def = nir_vec_scalars(&b, srcs, intrin->num_components);
|
||||
}
|
||||
|
||||
assert(new_def->num_components == intrin->num_components);
|
||||
|
@@ -124,7 +124,7 @@ combine_stores(struct combine_stores_state *state,
|
||||
/* Build a new vec, to be used as source for the combined store. As it
|
||||
* gets build, remove previous stores that are not needed anymore.
|
||||
*/
|
||||
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = {0};
|
||||
nir_ssa_scalar comps[NIR_MAX_VEC_COMPONENTS] = {0};
|
||||
unsigned num_components = glsl_get_vector_elements(combo->dst->type);
|
||||
unsigned bit_size = combo->latest->src[1].ssa->bit_size;
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
@@ -137,19 +137,17 @@ combine_stores(struct combine_stores_state *state,
|
||||
* and store->src[1] is a scalar. Otherwise, we're a regular vector
|
||||
* load and we have to pick off a component.
|
||||
*/
|
||||
comps[i] = store->num_components == 1 ?
|
||||
store->src[1].ssa :
|
||||
nir_channel(&state->b, store->src[1].ssa, i);
|
||||
comps[i] = nir_get_ssa_scalar(store->src[1].ssa, store->num_components == 1 ? 0 : i);
|
||||
|
||||
assert(store->instr.pass_flags > 0);
|
||||
if (--store->instr.pass_flags == 0 && store != combo->latest)
|
||||
nir_instr_remove(&store->instr);
|
||||
} else {
|
||||
comps[i] = nir_ssa_undef(&state->b, 1, bit_size);
|
||||
comps[i] = nir_get_ssa_scalar(nir_ssa_undef(&state->b, 1, bit_size), 0);
|
||||
}
|
||||
}
|
||||
assert(combo->latest->instr.pass_flags == 0);
|
||||
nir_ssa_def *vec = nir_vec(&state->b, comps, num_components);
|
||||
nir_ssa_def *vec = nir_vec_scalars(&state->b, comps, num_components);
|
||||
|
||||
/* Fix the latest store with the combined information. */
|
||||
nir_intrinsic_instr *store = combo->latest;
|
||||
|
@@ -592,10 +592,10 @@ load_from_ssa_entry_value(struct copy_prop_var_state *state,
|
||||
intrin->intrinsic == nir_intrinsic_load_deref ? &intrin->dest.ssa : NULL;
|
||||
|
||||
bool keep_intrin = false;
|
||||
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_ssa_scalar comps[NIR_MAX_VEC_COMPONENTS];
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
if (value->ssa.def[i]) {
|
||||
comps[i] = nir_channel(b, value->ssa.def[i], value->ssa.component[i]);
|
||||
comps[i] = nir_get_ssa_scalar(value->ssa.def[i], value->ssa.component[i]);
|
||||
} else {
|
||||
/* We don't have anything for this component in our
|
||||
* list. Just re-use a channel from the load.
|
||||
@@ -606,11 +606,11 @@ load_from_ssa_entry_value(struct copy_prop_var_state *state,
|
||||
if (load_def->parent_instr == &intrin->instr)
|
||||
keep_intrin = true;
|
||||
|
||||
comps[i] = nir_channel(b, load_def, i);
|
||||
comps[i] = nir_get_ssa_scalar(load_def, i);
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *vec = nir_vec(b, comps, num_components);
|
||||
nir_ssa_def *vec = nir_vec_scalars(b, comps, num_components);
|
||||
value_set_ssa_components(value, vec, num_components);
|
||||
|
||||
if (!keep_intrin) {
|
||||
|
@@ -111,14 +111,14 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
|
||||
|
||||
if (is_vec) {
|
||||
/* replace vecN with smaller version */
|
||||
nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS] = { 0 };
|
||||
nir_ssa_scalar srcs[NIR_MAX_VEC_COMPONENTS] = { 0 };
|
||||
unsigned index = 0;
|
||||
for (int i = 0; i < last_bit; i++) {
|
||||
if ((mask >> i) & 0x1)
|
||||
srcs[index++] = nir_ssa_for_alu_src(b, instr, i);
|
||||
srcs[index++] = nir_get_ssa_scalar(instr->src[i].src.ssa, instr->src[i].swizzle[0]);
|
||||
}
|
||||
assert(index == num_components);
|
||||
nir_ssa_def *new_vec = nir_vec(b, srcs, num_components);
|
||||
nir_ssa_def *new_vec = nir_vec_scalars(b, srcs, num_components);
|
||||
nir_ssa_def_rewrite_uses(def, new_vec);
|
||||
def = new_vec;
|
||||
}
|
||||
|
@@ -3196,31 +3196,31 @@ nir_to_tgsi_lower_64bit_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
|
||||
if (has_dest) {
|
||||
/* Merge the two loads' results back into a vector. */
|
||||
nir_ssa_def *channels[4] = {
|
||||
nir_channel(b, &first->dest.ssa, 0),
|
||||
nir_channel(b, &first->dest.ssa, 1),
|
||||
nir_channel(b, &second->dest.ssa, 0),
|
||||
second->num_components > 1 ? nir_channel(b, &second->dest.ssa, 1) : NULL,
|
||||
nir_ssa_scalar channels[4] = {
|
||||
nir_get_ssa_scalar(&first->dest.ssa, 0),
|
||||
nir_get_ssa_scalar(&first->dest.ssa, 1),
|
||||
nir_get_ssa_scalar(&second->dest.ssa, 0),
|
||||
nir_get_ssa_scalar(&second->dest.ssa, second->num_components > 1 ? 1 : 0),
|
||||
};
|
||||
nir_ssa_def *new = nir_vec(b, channels, instr->num_components);
|
||||
nir_ssa_def *new = nir_vec_scalars(b, channels, instr->num_components);
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.ssa, new);
|
||||
} else {
|
||||
/* Split the src value across the two stores. */
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
|
||||
nir_ssa_def *src0 = instr->src[0].ssa;
|
||||
nir_ssa_def *channels[4] = { 0 };
|
||||
nir_ssa_scalar channels[4] = { 0 };
|
||||
for (int i = 0; i < instr->num_components; i++)
|
||||
channels[i] = nir_channel(b, src0, i);
|
||||
channels[i] = nir_get_ssa_scalar(src0, i);
|
||||
|
||||
nir_intrinsic_set_write_mask(first, nir_intrinsic_write_mask(instr) & 3);
|
||||
nir_intrinsic_set_write_mask(second, nir_intrinsic_write_mask(instr) >> 2);
|
||||
|
||||
nir_instr_rewrite_src(&first->instr, &first->src[0],
|
||||
nir_src_for_ssa(nir_vec(b, channels, 2)));
|
||||
nir_src_for_ssa(nir_vec_scalars(b, channels, 2)));
|
||||
nir_instr_rewrite_src(&second->instr, &second->src[0],
|
||||
nir_src_for_ssa(nir_vec(b, &channels[2],
|
||||
second->num_components)));
|
||||
nir_src_for_ssa(nir_vec_scalars(b, &channels[2],
|
||||
second->num_components)));
|
||||
}
|
||||
|
||||
int offset_src = -1;
|
||||
@@ -3327,7 +3327,7 @@ nir_to_tgsi_lower_64bit_to_vec2(nir_shader *s)
|
||||
}
|
||||
|
||||
struct ntt_lower_tex_state {
|
||||
nir_ssa_def *channels[8];
|
||||
nir_ssa_scalar channels[8];
|
||||
unsigned i;
|
||||
};
|
||||
|
||||
@@ -3345,7 +3345,7 @@ nir_to_tgsi_lower_tex_instr_arg(nir_builder *b,
|
||||
|
||||
nir_ssa_def *def = instr->src[tex_src].src.ssa;
|
||||
for (int i = 0; i < def->num_components; i++) {
|
||||
s->channels[s->i++] = nir_channel(b, def, i);
|
||||
s->channels[s->i++] = nir_get_ssa_scalar(def, i);
|
||||
}
|
||||
|
||||
nir_tex_instr_remove_src(instr, tex_src);
|
||||
@@ -3400,22 +3400,22 @@ nir_to_tgsi_lower_tex_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||
nir_to_tgsi_lower_tex_instr_arg(b, tex, nir_tex_src_ms_index, &s);
|
||||
|
||||
/* No need to pack undefs in unused channels of the tex instr */
|
||||
while (!s.channels[s.i - 1])
|
||||
while (!s.channels[s.i - 1].def)
|
||||
s.i--;
|
||||
|
||||
/* Instead of putting undefs in the unused slots of the vecs, just put in
|
||||
* another used channel. Otherwise, we'll get unnecessary moves into
|
||||
* registers.
|
||||
*/
|
||||
assert(s.channels[0] != NULL);
|
||||
assert(s.channels[0].def != NULL);
|
||||
for (int i = 1; i < s.i; i++) {
|
||||
if (!s.channels[i])
|
||||
if (!s.channels[i].def)
|
||||
s.channels[i] = s.channels[0];
|
||||
}
|
||||
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(nir_vec(b, s.channels, MIN2(s.i, 4))));
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(nir_vec_scalars(b, s.channels, MIN2(s.i, 4))));
|
||||
if (s.i > 4)
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_backend2, nir_src_for_ssa(nir_vec(b, &s.channels[4], s.i - 4)));
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_backend2, nir_src_for_ssa(nir_vec_scalars(b, &s.channels[4], s.i - 4)));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
Reference in New Issue
Block a user