nir: find induction/limit vars in iand instructions
This will be used to help find the trip count of loops that look like the following: while (a < x && i < 8) { ... i++; } Where the NIR will end up looking something like this: vec1 32 ssa_1 = load_const (0x00000004 /* 0.000000 */) loop { ... vec1 1 ssa_12 = ilt ssa_225, ssa_11 vec1 1 ssa_17 = ilt ssa_226, ssa_1 vec1 1 ssa_18 = iand ssa_12, ssa_17 vec1 1 ssa_19 = inot ssa_18 if ssa_19 { ... break } else { ... } } On RADV this unrolls a bunch of loops in F1-2017 shaders. Totals from affected shaders: SGPRS: 4112 -> 4136 (0.58 %) VGPRS: 4132 -> 4052 (-1.94 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 515444 -> 587720 (14.02 %) bytes LDS: 2 -> 2 (0.00 %) blocks Max Waves: 194 -> 196 (1.03 %) Wait states: 0 -> 0 (0.00 %) It also unrolls a couple of loops in shader-db on radeonsi. Totals from affected shaders: SGPRS: 128 -> 128 (0.00 %) VGPRS: 64 -> 64 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Private memory VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 6880 -> 9504 (38.14 %) bytes LDS: 0 -> 0 (0.00 %) blocks Max Waves: 16 -> 16 (0.00 %) Wait states: 0 -> 0 (0.00 %) Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
This commit is contained in:
@@ -792,6 +792,68 @@ get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
|
||||
return limit_rhs;
|
||||
}
|
||||
|
||||
static void
|
||||
try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
|
||||
nir_loop_variable **ind,
|
||||
nir_loop_variable **limit,
|
||||
bool *limit_rhs,
|
||||
loop_info_state *state)
|
||||
{
|
||||
assert((*alu)->op == nir_op_ieq || (*alu)->op == nir_op_inot);
|
||||
|
||||
nir_ssa_def *iand_def = (*alu)->src[0].src.ssa;
|
||||
|
||||
if ((*alu)->op == nir_op_ieq) {
|
||||
nir_ssa_def *zero_def = (*alu)->src[1].src.ssa;
|
||||
|
||||
if (iand_def->parent_instr->type != nir_instr_type_alu ||
|
||||
zero_def->parent_instr->type != nir_instr_type_load_const) {
|
||||
|
||||
/* Maybe we had it the wrong way, flip things around */
|
||||
iand_def = (*alu)->src[1].src.ssa;
|
||||
zero_def = (*alu)->src[0].src.ssa;
|
||||
|
||||
/* If we still didn't find what we need then return */
|
||||
if (zero_def->parent_instr->type != nir_instr_type_load_const)
|
||||
return;
|
||||
}
|
||||
|
||||
/* If the loop is not breaking on (x && y) == 0 then return */
|
||||
nir_const_value zero =
|
||||
nir_instr_as_load_const(zero_def->parent_instr)->value;
|
||||
if (zero.i32[0] != 0)
|
||||
return;
|
||||
}
|
||||
|
||||
if (iand_def->parent_instr->type != nir_instr_type_alu)
|
||||
return;
|
||||
|
||||
nir_alu_instr *iand = nir_instr_as_alu(iand_def->parent_instr);
|
||||
if (iand->op != nir_op_iand)
|
||||
return;
|
||||
|
||||
/* Check if iand src is a terminator condition and try get induction var
|
||||
* and trip limit var.
|
||||
*/
|
||||
nir_ssa_def *src = iand->src[0].src.ssa;
|
||||
if (src->parent_instr->type == nir_instr_type_alu) {
|
||||
*alu = nir_instr_as_alu(src->parent_instr);
|
||||
if (is_supported_terminator_condition(*alu))
|
||||
*limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
|
||||
}
|
||||
|
||||
/* Try the other iand src if needed */
|
||||
if (*ind == NULL || *ind && (*ind)->type != basic_induction ||
|
||||
!is_var_constant(*limit)) {
|
||||
src = iand->src[1].src.ssa;
|
||||
if (src->parent_instr->type == nir_instr_type_alu) {
|
||||
*alu = nir_instr_as_alu(src->parent_instr);
|
||||
if (is_supported_terminator_condition(*alu))
|
||||
*limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Run through each of the terminators of the loop and try to infer a possible
|
||||
* trip-count. We need to check them all, and set the lowest trip-count as the
|
||||
* trip-count of our loop. If one of the terminators has an undecidable
|
||||
@@ -821,16 +883,35 @@ find_trip_count(loop_info_state *state)
|
||||
nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr);
|
||||
nir_op alu_op = alu->op;
|
||||
|
||||
if (!is_supported_terminator_condition(alu)) {
|
||||
trip_count_known = false;
|
||||
continue;
|
||||
bool limit_rhs;
|
||||
nir_loop_variable *basic_ind = NULL;
|
||||
nir_loop_variable *limit;
|
||||
if (alu->op == nir_op_inot || alu->op == nir_op_ieq) {
|
||||
nir_alu_instr *new_alu = alu;
|
||||
try_find_trip_count_vars_in_iand(&new_alu, &basic_ind, &limit,
|
||||
&limit_rhs, state);
|
||||
|
||||
/* The loop is exiting on (x && y) == 0 so we need to get the
|
||||
* inverse of x or y (i.e. which ever contained the induction var) in
|
||||
* order to compute the trip count.
|
||||
*/
|
||||
if (basic_ind && basic_ind->type == basic_induction) {
|
||||
alu = new_alu;
|
||||
alu_op = inverse_comparison(alu);
|
||||
trip_count_known = false;
|
||||
terminator->exact_trip_count_unknown = true;
|
||||
}
|
||||
}
|
||||
|
||||
nir_loop_variable *basic_ind;
|
||||
nir_loop_variable *limit;
|
||||
bool limit_rhs = get_induction_and_limit_vars(alu, &basic_ind, &limit,
|
||||
state);
|
||||
terminator->induction_rhs = !limit_rhs;
|
||||
if (!basic_ind) {
|
||||
if (!is_supported_terminator_condition(alu)) {
|
||||
trip_count_known = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
limit_rhs = get_induction_and_limit_vars(alu, &basic_ind, &limit,
|
||||
state);
|
||||
}
|
||||
|
||||
/* The comparison has to have a basic induction variable for us to be
|
||||
* able to find trip counts.
|
||||
@@ -840,6 +921,8 @@ find_trip_count(loop_info_state *state)
|
||||
continue;
|
||||
}
|
||||
|
||||
terminator->induction_rhs = !limit_rhs;
|
||||
|
||||
/* Attempt to find a constant limit for the loop */
|
||||
nir_const_value limit_val;
|
||||
if (is_var_constant(limit)) {
|
||||
|
Reference in New Issue
Block a user