nir/copy_prop: Respect the source's number of components
In the near future we are going to require that the num_components in a src dereference match the num_components of the SSA value being dereferenced. To do that, we need copy_prop to not remove our MOVs from a larger SSA value into an instruction that uses fewer channels. Because we suddenly have to know how many components each source has, this makes the pass a bit more complicated. Fortunately, copy propagation is the only pass that cares about the number of components are read by any given source so it's fairly contained. Shader-db results on Sky Lake: total instructions in shared programs: 13318947 -> 13320265 (0.01%) instructions in affected programs: 260633 -> 261951 (0.51%) helped: 324 HURT: 1027 Looking through the hurt programs, about a dozen are hurt by 3 instructions and the rest are all hurt by 2 instructions. From a spot-check of the shaders, the story is always the same: They get a vec4 from somewhere (frequently an input) and use the first two or three components as a texture coordinate. Because of the vector component mismatch, we have a mov or, more likely, a vecN sitting between the texture instruction and the input. This means that the back-end inserts a bunch of MOVs and split_virtual_grfs() goes to town. Because the texture coordinate is also used by some other calculation, register coalesce can't combine them back together and we end up with an extra 2 MOV instructions in our shader. Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:
@@ -99,11 +99,12 @@ is_swizzleless_move(nir_alu_instr *instr)
|
||||
}
|
||||
|
||||
static bool
|
||||
copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
|
||||
copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if,
|
||||
unsigned num_components)
|
||||
{
|
||||
if (!src->is_ssa) {
|
||||
if (src->reg.indirect)
|
||||
return copy_prop_src(src->reg.indirect, parent_instr, parent_if);
|
||||
return copy_prop_src(src->reg.indirect, parent_instr, parent_if, 1);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -115,17 +116,8 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
|
||||
if (!is_swizzleless_move(alu_instr))
|
||||
return false;
|
||||
|
||||
/* Don't let copy propagation land us with a phi that has more
|
||||
* components in its source than it has in its destination. That badly
|
||||
* messes up out-of-ssa.
|
||||
*/
|
||||
if (parent_instr && parent_instr->type == nir_instr_type_phi) {
|
||||
nir_phi_instr *phi = nir_instr_as_phi(parent_instr);
|
||||
assert(phi->dest.is_ssa);
|
||||
if (phi->dest.ssa.num_components !=
|
||||
alu_instr->src[0].src.ssa->num_components)
|
||||
return false;
|
||||
}
|
||||
if (alu_instr->src[0].src.ssa->num_components != num_components)
|
||||
return false;
|
||||
|
||||
if (parent_instr) {
|
||||
nir_instr_rewrite_src(parent_instr, src,
|
||||
@@ -146,7 +138,7 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
|
||||
if (!src->src.is_ssa) {
|
||||
if (src->src.reg.indirect)
|
||||
return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr,
|
||||
NULL);
|
||||
NULL, 1);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -193,51 +185,122 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
nir_instr *parent_instr;
|
||||
bool progress;
|
||||
} copy_prop_state;
|
||||
static bool
|
||||
copy_prop_dest(nir_dest *dest, nir_instr *instr)
|
||||
{
|
||||
if (!dest->is_ssa && dest->reg.indirect)
|
||||
return copy_prop_src(dest->reg.indirect, instr, NULL, 1);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
copy_prop_src_cb(nir_src *src, void *_state)
|
||||
copy_prop_deref_var(nir_instr *instr, nir_deref_var *deref_var)
|
||||
{
|
||||
copy_prop_state *state = (copy_prop_state *) _state;
|
||||
while (copy_prop_src(src, state->parent_instr, NULL))
|
||||
state->progress = true;
|
||||
if (!deref_var)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
bool progress = false;
|
||||
for (nir_deref *deref = deref_var->deref.child;
|
||||
deref; deref = deref->child) {
|
||||
if (deref->deref_type != nir_deref_type_array)
|
||||
continue;
|
||||
|
||||
nir_deref_array *arr = nir_deref_as_array(deref);
|
||||
if (arr->deref_array_type != nir_deref_array_type_indirect)
|
||||
continue;
|
||||
|
||||
while (copy_prop_src(&arr->indirect, instr, NULL, 1))
|
||||
progress = true;
|
||||
}
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
copy_prop_instr(nir_instr *instr)
|
||||
{
|
||||
if (instr->type == nir_instr_type_alu) {
|
||||
bool progress = false;
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu: {
|
||||
nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
|
||||
bool progress = false;
|
||||
|
||||
for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++)
|
||||
while (copy_prop_alu_src(alu_instr, i))
|
||||
progress = true;
|
||||
|
||||
if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect)
|
||||
while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL))
|
||||
progress = true;
|
||||
while (copy_prop_dest(&alu_instr->dest.dest, instr))
|
||||
progress = true;
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
copy_prop_state state;
|
||||
state.parent_instr = instr;
|
||||
state.progress = false;
|
||||
nir_foreach_src(instr, copy_prop_src_cb, &state);
|
||||
case nir_instr_type_tex: {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
unsigned num_components = nir_tex_instr_src_size(tex, i);
|
||||
while (copy_prop_src(&tex->src[i].src, instr, NULL, num_components))
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return state.progress;
|
||||
if (copy_prop_deref_var(instr, tex->texture))
|
||||
progress = true;
|
||||
if (copy_prop_deref_var(instr, tex->sampler))
|
||||
progress = true;
|
||||
|
||||
while (copy_prop_dest(&tex->dest, instr))
|
||||
progress = true;
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
for (unsigned i = 0;
|
||||
i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
|
||||
unsigned num_components =
|
||||
nir_intrinsic_infos[intrin->intrinsic].src_components[i];
|
||||
if (!num_components)
|
||||
num_components = intrin->num_components;
|
||||
|
||||
while (copy_prop_src(&intrin->src[i], instr, NULL, num_components))
|
||||
progress = true;
|
||||
}
|
||||
|
||||
for (unsigned i = 0;
|
||||
i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {
|
||||
if (copy_prop_deref_var(instr, intrin->variables[i]))
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
|
||||
while (copy_prop_dest(&intrin->dest, instr))
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
case nir_instr_type_phi: {
|
||||
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
||||
assert(phi->dest.is_ssa);
|
||||
unsigned num_components = phi->dest.ssa.num_components;
|
||||
nir_foreach_phi_src(src, phi) {
|
||||
while (copy_prop_src(&src->src, instr, NULL, num_components))
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
copy_prop_if(nir_if *if_stmt)
|
||||
{
|
||||
return copy_prop_src(&if_stmt->condition, NULL, if_stmt);
|
||||
return copy_prop_src(&if_stmt->condition, NULL, if_stmt, 1);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
Reference in New Issue
Block a user