nir: Shrink store intrinsic num_components to the size used by the writemask.
This cuts a bunch of vector setup for undef components in the i965 vec4 backend. Noticed while looking into codegen regressions in nir-to-tgsi. brw results: total instructions in shared programs: 3893221 -> 3881461 (-0.30%) total cycles in shared programs: 113792154 -> 113810288 (0.02%) Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6054>
This commit is contained in:
@@ -110,7 +110,7 @@ opt_shrink_vectors_alu(nir_builder *b, nir_alu_instr *instr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
opt_shrink_vectors_intrinsic(nir_intrinsic_instr *instr)
|
opt_shrink_vectors_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
|
||||||
{
|
{
|
||||||
switch (instr->intrinsic) {
|
switch (instr->intrinsic) {
|
||||||
case nir_intrinsic_load_uniform:
|
case nir_intrinsic_load_uniform:
|
||||||
@@ -125,19 +125,44 @@ opt_shrink_vectors_intrinsic(nir_intrinsic_instr *instr)
|
|||||||
case nir_intrinsic_load_global:
|
case nir_intrinsic_load_global:
|
||||||
case nir_intrinsic_load_kernel_input:
|
case nir_intrinsic_load_kernel_input:
|
||||||
case nir_intrinsic_load_scratch:
|
case nir_intrinsic_load_scratch:
|
||||||
|
case nir_intrinsic_store_output:
|
||||||
|
case nir_intrinsic_store_per_vertex_output:
|
||||||
|
case nir_intrinsic_store_ssbo:
|
||||||
|
case nir_intrinsic_store_shared:
|
||||||
|
case nir_intrinsic_store_global:
|
||||||
|
case nir_intrinsic_store_scratch:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(nir_intrinsic_infos[instr->intrinsic].has_dest);
|
|
||||||
/* Must be a vectorized intrinsic that we can resize. */
|
/* Must be a vectorized intrinsic that we can resize. */
|
||||||
assert(instr->num_components != 0);
|
assert(instr->num_components != 0);
|
||||||
|
|
||||||
|
if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
|
||||||
|
/* loads: Trim the dest to the used channels */
|
||||||
|
|
||||||
if (shrink_dest_to_read_mask(&instr->dest.ssa)) {
|
if (shrink_dest_to_read_mask(&instr->dest.ssa)) {
|
||||||
instr->num_components = instr->dest.ssa.num_components;
|
instr->num_components = instr->dest.ssa.num_components;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
/* Stores: trim the num_components stored according to the write
|
||||||
|
* mask.
|
||||||
|
*/
|
||||||
|
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||||
|
unsigned last_bit = util_last_bit(write_mask);
|
||||||
|
if (last_bit < instr->num_components && instr->src[0].is_ssa) {
|
||||||
|
nir_ssa_def *def = nir_channels(b, instr->src[0].ssa,
|
||||||
|
BITSET_MASK(last_bit));
|
||||||
|
nir_instr_rewrite_src(&instr->instr,
|
||||||
|
&instr->src[0],
|
||||||
|
nir_src_for_ssa(def));
|
||||||
|
instr->num_components = last_bit;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -164,7 +189,7 @@ opt_shrink_vectors_instr(nir_builder *b, nir_instr *instr)
|
|||||||
return opt_shrink_vectors_alu(b, nir_instr_as_alu(instr));
|
return opt_shrink_vectors_alu(b, nir_instr_as_alu(instr));
|
||||||
|
|
||||||
case nir_instr_type_intrinsic:
|
case nir_instr_type_intrinsic:
|
||||||
return opt_shrink_vectors_intrinsic(nir_instr_as_intrinsic(instr));
|
return opt_shrink_vectors_intrinsic(b, nir_instr_as_intrinsic(instr));
|
||||||
|
|
||||||
case nir_instr_type_load_const:
|
case nir_instr_type_load_const:
|
||||||
return opt_shrink_vectors_load_const(nir_instr_as_load_const(instr));
|
return opt_shrink_vectors_load_const(nir_instr_as_load_const(instr));
|
||||||
|
Reference in New Issue
Block a user