intel/fs: Use nir_lower_interpolation on gen11+
On gen11, the removed the PLN instruction so we have to emit a pile of MAD to emulate it. We may as well do that in NIR so we can optimize and later schedule it. Shader-db results on Ice Lake: total instructions in shared programs: 17145644 -> 16556440 (-3.44%) instructions in affected programs: 11507454 -> 10918250 (-5.12%) helped: 35763 HURT: 42085 helped stats (abs) min: 1 max: 140 x̄: 19.09 x̃: 18 helped stats (rel) min: 0.04% max: 37.93% x̄: 15.40% x̃: 14.49% HURT stats (abs) min: 1 max: 248 x̄: 2.22 x̃: 2 HURT stats (rel) min: 0.05% max: 50.00% x̄: 5.00% x̃: 2.47% 95% mean confidence interval for instructions value: -7.67 -7.47 95% mean confidence interval for instructions %-change: -4.46% -4.29% Instructions are helped. total loops in shared programs: 4370 -> 4370 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total cycles in shared programs: 360624645 -> 368220857 (2.11%) cycles in affected programs: 269631244 -> 277227456 (2.82%) helped: 15583 HURT: 65874 helped stats (abs) min: 1 max: 28561 x̄: 78.45 x̃: 32 helped stats (rel) min: <.01% max: 67.81% x̄: 5.38% x̃: 2.44% HURT stats (abs) min: 1 max: 238638 x̄: 133.87 x̃: 20 HURT stats (rel) min: <.01% max: 306.25% x̄: 5.81% x̃: 3.97% 95% mean confidence interval for cycles value: 67.42 119.09 95% mean confidence interval for cycles %-change: 3.61% 3.73% Cycles are HURT. total spills in shared programs: 8943 -> 8981 (0.42%) spills in affected programs: 1925 -> 1963 (1.97%) helped: 44 HURT: 14 total fills in shared programs: 21815 -> 21925 (0.50%) fills in affected programs: 3511 -> 3621 (3.13%) helped: 41 HURT: 18 LOST: 70 GAINED: 14 Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
@@ -3857,47 +3857,6 @@ fs_visitor::lower_load_payload()
|
|||||||
return progress;
|
return progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
|
||||||
fs_visitor::lower_linterp()
|
|
||||||
{
|
|
||||||
bool progress = false;
|
|
||||||
|
|
||||||
if (devinfo->gen < 11)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
|
|
||||||
const fs_builder ibld(this, block, inst);
|
|
||||||
|
|
||||||
if (inst->opcode != FS_OPCODE_LINTERP)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
fs_reg dwP = component(inst->src[1], 0);
|
|
||||||
fs_reg dwQ = component(inst->src[1], 1);
|
|
||||||
fs_reg dwR = component(inst->src[1], 3);
|
|
||||||
for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 8); i++) {
|
|
||||||
const fs_builder hbld(ibld.half(i));
|
|
||||||
fs_reg dst = half(inst->dst, i);
|
|
||||||
fs_reg delta_xy = offset(inst->src[0], ibld, i);
|
|
||||||
hbld.MAD(dst, dwR, half(delta_xy, 0), dwP);
|
|
||||||
fs_inst *mad = hbld.MAD(dst, dst, half(delta_xy, 1), dwQ);
|
|
||||||
|
|
||||||
/* Propagate conditional mod and saturate from the original
|
|
||||||
* instruction to the second MAD instruction.
|
|
||||||
*/
|
|
||||||
set_saturate(inst->saturate, mad);
|
|
||||||
set_condmod(inst->conditional_mod, mad);
|
|
||||||
}
|
|
||||||
|
|
||||||
inst->remove(block);
|
|
||||||
progress = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (progress)
|
|
||||||
invalidate_live_intervals();
|
|
||||||
|
|
||||||
return progress;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_visitor::lower_integer_multiplication()
|
fs_visitor::lower_integer_multiplication()
|
||||||
{
|
{
|
||||||
@@ -7095,11 +7054,6 @@ fs_visitor::optimize()
|
|||||||
OPT(compact_virtual_grfs);
|
OPT(compact_virtual_grfs);
|
||||||
} while (progress);
|
} while (progress);
|
||||||
|
|
||||||
if (OPT(lower_linterp)) {
|
|
||||||
OPT(opt_copy_propagation);
|
|
||||||
OPT(dead_code_eliminate);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Do this after cmod propagation has had every possible opportunity to
|
/* Do this after cmod propagation has had every possible opportunity to
|
||||||
* propagate results into SEL instructions.
|
* propagate results into SEL instructions.
|
||||||
*/
|
*/
|
||||||
|
@@ -163,7 +163,6 @@ public:
|
|||||||
bool lower_pack();
|
bool lower_pack();
|
||||||
bool lower_regioning();
|
bool lower_regioning();
|
||||||
bool lower_logical_sends();
|
bool lower_logical_sends();
|
||||||
bool lower_linterp();
|
|
||||||
bool lower_integer_multiplication();
|
bool lower_integer_multiplication();
|
||||||
bool lower_minmax();
|
bool lower_minmax();
|
||||||
bool lower_simd_width();
|
bool lower_simd_width();
|
||||||
|
@@ -822,7 +822,7 @@ fs_generator::generate_linterp(fs_inst *inst,
|
|||||||
struct brw_reg interp = stride(src[1], 0, 1, 0);
|
struct brw_reg interp = stride(src[1], 0, 1, 0);
|
||||||
brw_inst *i[2];
|
brw_inst *i[2];
|
||||||
|
|
||||||
/* fs_visitor::lower_linterp() will do the lowering to MAD instructions for
|
/* nir_lower_interpolation() will do the lowering to MAD instructions for
|
||||||
* us on gen11+
|
* us on gen11+
|
||||||
*/
|
*/
|
||||||
assert(devinfo->gen < 11);
|
assert(devinfo->gen < 11);
|
||||||
|
@@ -452,6 +452,8 @@ brw_nir_lower_fs_inputs(nir_shader *nir,
|
|||||||
lower_io_options |= nir_lower_io_force_sample_interpolation;
|
lower_io_options |= nir_lower_io_force_sample_interpolation;
|
||||||
|
|
||||||
nir_lower_io(nir, nir_var_shader_in, type_size_vec4, lower_io_options);
|
nir_lower_io(nir, nir_var_shader_in, type_size_vec4, lower_io_options);
|
||||||
|
if (devinfo->gen >= 11)
|
||||||
|
nir_lower_interpolation(nir, ~0);
|
||||||
|
|
||||||
/* This pass needs actual constants */
|
/* This pass needs actual constants */
|
||||||
nir_opt_constant_folding(nir);
|
nir_opt_constant_folding(nir);
|
||||||
|
Reference in New Issue
Block a user