nir/copy_prop: Respect the source's number of components

In the near future we are going to require that the num_components in a
src dereference match the num_components of the SSA value being
dereferenced.  To do that, we need copy_prop to not remove our MOVs from
a larger SSA value into an instruction that uses fewer channels.

Because we suddenly have to know how many components each source has,
this makes the pass a bit more complicated.  Fortunately, copy
propagation is the only pass that cares about the number of components
are read by any given source so it's fairly contained.

Shader-db results on Sky Lake:

   total instructions in shared programs: 13318947 -> 13320265 (0.01%)
   instructions in affected programs: 260633 -> 261951 (0.51%)
   helped: 324
   HURT: 1027

Looking through the hurt programs, about a dozen are hurt by 3
instructions and the rest are all hurt by 2 instructions.  From a
spot-check of the shaders, the story is always the same:  They get a
vec4 from somewhere (frequently an input) and use the first two or three
components as a texture coordinate.  Because of the vector component
mismatch, we have a mov or, more likely, a vecN sitting between the
texture instruction and the input.  This means that the back-end inserts
a bunch of MOVs and split_virtual_grfs() goes to town.  Because the
texture coordinate is also used by some other calculation, register
coalesce can't combine them back together and we end up with an extra 2
MOV instructions in our shader.

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:
Jason Ekstrand
2017-03-02 17:10:24 -08:00
parent 60d1aac28a
commit 3c312be7b3

View File

@@ -99,11 +99,12 @@ is_swizzleless_move(nir_alu_instr *instr)
}
static bool
copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if,
unsigned num_components)
{
if (!src->is_ssa) {
if (src->reg.indirect)
return copy_prop_src(src->reg.indirect, parent_instr, parent_if);
return copy_prop_src(src->reg.indirect, parent_instr, parent_if, 1);
return false;
}
@@ -115,17 +116,8 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
if (!is_swizzleless_move(alu_instr))
return false;
/* Don't let copy propagation land us with a phi that has more
* components in its source than it has in its destination. That badly
* messes up out-of-ssa.
*/
if (parent_instr && parent_instr->type == nir_instr_type_phi) {
nir_phi_instr *phi = nir_instr_as_phi(parent_instr);
assert(phi->dest.is_ssa);
if (phi->dest.ssa.num_components !=
alu_instr->src[0].src.ssa->num_components)
return false;
}
if (alu_instr->src[0].src.ssa->num_components != num_components)
return false;
if (parent_instr) {
nir_instr_rewrite_src(parent_instr, src,
@@ -146,7 +138,7 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
if (!src->src.is_ssa) {
if (src->src.reg.indirect)
return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr,
NULL);
NULL, 1);
return false;
}
@@ -193,51 +185,122 @@ copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
return true;
}
typedef struct {
nir_instr *parent_instr;
bool progress;
} copy_prop_state;
static bool
copy_prop_dest(nir_dest *dest, nir_instr *instr)
{
if (!dest->is_ssa && dest->reg.indirect)
return copy_prop_src(dest->reg.indirect, instr, NULL, 1);
return false;
}
static bool
copy_prop_src_cb(nir_src *src, void *_state)
copy_prop_deref_var(nir_instr *instr, nir_deref_var *deref_var)
{
copy_prop_state *state = (copy_prop_state *) _state;
while (copy_prop_src(src, state->parent_instr, NULL))
state->progress = true;
if (!deref_var)
return false;
return true;
bool progress = false;
for (nir_deref *deref = deref_var->deref.child;
deref; deref = deref->child) {
if (deref->deref_type != nir_deref_type_array)
continue;
nir_deref_array *arr = nir_deref_as_array(deref);
if (arr->deref_array_type != nir_deref_array_type_indirect)
continue;
while (copy_prop_src(&arr->indirect, instr, NULL, 1))
progress = true;
}
return progress;
}
static bool
copy_prop_instr(nir_instr *instr)
{
if (instr->type == nir_instr_type_alu) {
bool progress = false;
switch (instr->type) {
case nir_instr_type_alu: {
nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
bool progress = false;
for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++)
while (copy_prop_alu_src(alu_instr, i))
progress = true;
if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect)
while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL))
progress = true;
while (copy_prop_dest(&alu_instr->dest.dest, instr))
progress = true;
return progress;
}
copy_prop_state state;
state.parent_instr = instr;
state.progress = false;
nir_foreach_src(instr, copy_prop_src_cb, &state);
case nir_instr_type_tex: {
nir_tex_instr *tex = nir_instr_as_tex(instr);
for (unsigned i = 0; i < tex->num_srcs; i++) {
unsigned num_components = nir_tex_instr_src_size(tex, i);
while (copy_prop_src(&tex->src[i].src, instr, NULL, num_components))
progress = true;
}
return state.progress;
if (copy_prop_deref_var(instr, tex->texture))
progress = true;
if (copy_prop_deref_var(instr, tex->sampler))
progress = true;
while (copy_prop_dest(&tex->dest, instr))
progress = true;
return progress;
}
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
for (unsigned i = 0;
i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
unsigned num_components =
nir_intrinsic_infos[intrin->intrinsic].src_components[i];
if (!num_components)
num_components = intrin->num_components;
while (copy_prop_src(&intrin->src[i], instr, NULL, num_components))
progress = true;
}
for (unsigned i = 0;
i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {
if (copy_prop_deref_var(instr, intrin->variables[i]))
progress = true;
}
if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
while (copy_prop_dest(&intrin->dest, instr))
progress = true;
}
return progress;
}
case nir_instr_type_phi: {
nir_phi_instr *phi = nir_instr_as_phi(instr);
assert(phi->dest.is_ssa);
unsigned num_components = phi->dest.ssa.num_components;
nir_foreach_phi_src(src, phi) {
while (copy_prop_src(&src->src, instr, NULL, num_components))
progress = true;
}
return progress;
}
default:
return false;
}
}
static bool
copy_prop_if(nir_if *if_stmt)
{
return copy_prop_src(&if_stmt->condition, NULL, if_stmt);
return copy_prop_src(&if_stmt->condition, NULL, if_stmt, 1);
}
static bool