pan/bi: Don't lower fpow
We can fuse the intermediate multiply with the FMA_RSCALE in the exponent code and save an instruction. Whether this is better than adding a NIR op remains to be seen. total instructions in shared programs: 146614 -> 146190 (-0.29%) instructions in affected programs: 40724 -> 40300 (-1.04%) helped: 157 HURT: 0 helped stats (abs) min: 1.0 max: 9.0 x̄: 2.70 x̃: 2 helped stats (rel) min: 0.22% max: 10.34% x̄: 1.37% x̃: 1.20% 95% mean confidence interval for instructions value: -3.00 -2.40 95% mean confidence interval for instructions %-change: -1.58% -1.15% Instructions are helped. total tuples in shared programs: 128116 -> 127696 (-0.33%) tuples in affected programs: 33421 -> 33001 (-1.26%) helped: 150 HURT: 0 helped stats (abs) min: 1.0 max: 16.0 x̄: 2.80 x̃: 2 helped stats (rel) min: 0.28% max: 4.37% x̄: 1.36% x̃: 1.07% 95% mean confidence interval for tuples value: -3.24 -2.36 95% mean confidence interval for tuples %-change: -1.50% -1.21% Tuples are helped. total clauses in shared programs: 27531 -> 27483 (-0.17%) clauses in affected programs: 719 -> 671 (-6.68%) helped: 20 HURT: 0 helped stats (abs) min: 1.0 max: 8.0 x̄: 2.40 x̃: 1 helped stats (rel) min: 1.61% max: 12.90% x̄: 6.96% x̃: 5.33% 95% mean confidence interval for clauses value: -3.48 -1.32 95% mean confidence interval for clauses %-change: -9.10% -4.82% Clauses are helped. total cycles in shared programs: 12250.81 -> 12233.69 (-0.14%) cycles in affected programs: 1251.50 -> 1234.38 (-1.37%) helped: 141 HURT: 0 helped stats (abs) min: 0.041665999999999315 max: 0.6666670000000003 x̄: 0.12 x̃: 0 helped stats (rel) min: 0.29% max: 5.00% x̄: 1.48% x̃: 1.20% 95% mean confidence interval for cycles value: -0.14 -0.10 95% mean confidence interval for cycles %-change: -1.63% -1.32% Cycles are helped. total arith in shared programs: 4840.25 -> 4822.71 (-0.36%) arith in affected programs: 1324.08 -> 1306.54 (-1.32%) helped: 151 HURT: 0 helped stats (abs) min: 0.041665999999999315 max: 0.6666670000000003 x̄: 0.12 x̃: 0 helped stats (rel) min: 0.29% max: 5.00% x̄: 1.43% x̃: 1.13% 95% mean confidence interval for arith value: -0.13 -0.10 95% mean confidence interval for arith %-change: -1.59% -1.28% Arith are helped. total texture in shared programs: 1666.50 -> 1666.50 (0.00%) texture in affected programs: 0 -> 0 helped: 0 HURT: 0 total vary in shared programs: 639.06 -> 639.06 (0.00%) vary in affected programs: 0 -> 0 helped: 0 HURT: 0 total ldst in shared programs: 9682 -> 9682 (0.00%) ldst in affected programs: 0 -> 0 helped: 0 HURT: 0 total quadwords in shared programs: 116758 -> 116378 (-0.33%) quadwords in affected programs: 28054 -> 27674 (-1.35%) helped: 148 HURT: 2 helped stats (abs) min: 1.0 max: 16.0 x̄: 2.58 x̃: 2 helped stats (rel) min: 0.29% max: 5.13% x̄: 1.54% x̃: 1.23% HURT stats (abs) min: 1.0 max: 1.0 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.67% max: 0.85% x̄: 0.76% x̃: 0.76% 95% mean confidence interval for quadwords value: -2.94 -2.12 95% mean confidence interval for quadwords %-change: -1.69% -1.33% Quadwords are helped. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11733>
This commit is contained in:

committed by
Marge Bot

parent
91f130fa1e
commit
499397700c
@@ -1535,6 +1535,36 @@ bi_flog2_32(bi_builder *b, bi_index dst, bi_index s0)
|
||||
BI_ROUND_NONE);
|
||||
}
|
||||
|
||||
static void
|
||||
bi_lower_fpow_32(bi_builder *b, bi_index dst, bi_index base, bi_index exp)
|
||||
{
|
||||
bi_index log2_base = bi_null();
|
||||
|
||||
if (base.type == BI_INDEX_CONSTANT) {
|
||||
log2_base = bi_imm_f32(log2f(uif(base.value)));
|
||||
} else {
|
||||
log2_base = bi_temp(b->shader);
|
||||
bi_lower_flog2_32(b, log2_base, base);
|
||||
}
|
||||
|
||||
return bi_lower_fexp2_32(b, dst, bi_fmul_f32(b, exp, log2_base));
|
||||
}
|
||||
|
||||
static void
|
||||
bi_fpow_32(bi_builder *b, bi_index dst, bi_index base, bi_index exp)
|
||||
{
|
||||
bi_index log2_base = bi_null();
|
||||
|
||||
if (base.type == BI_INDEX_CONSTANT) {
|
||||
log2_base = bi_imm_f32(log2f(uif(base.value)));
|
||||
} else {
|
||||
log2_base = bi_temp(b->shader);
|
||||
bi_flog2_32(b, log2_base, base);
|
||||
}
|
||||
|
||||
return bi_fexp_32(b, dst, exp, log2_base);
|
||||
}
|
||||
|
||||
/* Bifrost has extremely coarse tables for approximating sin/cos, accessible as
|
||||
* FSIN/COS_TABLE.u6, which multiplies the bottom 6-bits by pi/32 and
|
||||
* calculates the results. We use them to calculate sin/cos via a Taylor
|
||||
@@ -1837,27 +1867,35 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
||||
bi_lower_fsincos_32(b, dst, s0, true);
|
||||
break;
|
||||
|
||||
case nir_op_fexp2: {
|
||||
case nir_op_fexp2:
|
||||
assert(sz == 32); /* should've been lowered */
|
||||
|
||||
if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
|
||||
bi_lower_fexp2_32(b, dst, s0);
|
||||
else
|
||||
bi_fexp2_32(b, dst, s0, bi_imm_f32(1.0f));
|
||||
bi_fexp_32(b, dst, s0, bi_imm_f32(1.0f));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_flog2: {
|
||||
case nir_op_flog2:
|
||||
assert(sz == 32); /* should've been lowered */
|
||||
|
||||
if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
|
||||
bi_lower_flog2_32(b, dst, s0);
|
||||
else
|
||||
bi_flog2_32_to(b, dst, s0);
|
||||
bi_flog2_32(b, dst, s0);
|
||||
|
||||
break;
|
||||
|
||||
case nir_op_fpow:
|
||||
assert(sz == 32); /* should've been lowered */
|
||||
|
||||
if (b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
|
||||
bi_lower_fpow_32(b, dst, s0, s1);
|
||||
else
|
||||
bi_fpow_32(b, dst, s0, s1);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_bcsel:
|
||||
if (src1_sz == 8)
|
||||
@@ -3059,6 +3097,7 @@ bi_lower_bit_size(const nir_instr *instr, UNUSED void *data)
|
||||
switch (alu->op) {
|
||||
case nir_op_fexp2:
|
||||
case nir_op_flog2:
|
||||
case nir_op_fpow:
|
||||
case nir_op_fsin:
|
||||
case nir_op_fcos:
|
||||
return (nir_dest_bit_size(alu->dest.dest) == 32) ? 0 : 32;
|
||||
|
@@ -43,7 +43,6 @@ static const nir_shader_compiler_options bifrost_nir_options = {
|
||||
.lower_fmod = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_isign = true,
|
||||
.lower_fpow = true,
|
||||
.lower_find_lsb = true,
|
||||
.lower_ifind_msb = true,
|
||||
.lower_fdph = true,
|
||||
|
Reference in New Issue
Block a user