pan/bi: Implement fsin/fcos

Instead of lowering it in NIR, use the lookup tables as inputs to a
second-order Taylor expansion. shader-db results aren't amazing but keep
in mind this is without backend CSE yet.

total instructions in shared programs: 115913 -> 115707 (-0.18%)
instructions in affected programs: 3151 -> 2945 (-6.54%)
helped: 12
HURT: 0
Instructions are helped.

total nops in shared programs: 84045 -> 84041 (<.01%)
nops in affected programs: 1571 -> 1567 (-0.25%)
helped: 1
HURT: 7
Inconclusive result (value mean confidence interval includes 0).

total clauses in shared programs: 20498 -> 20489 (-0.04%)
clauses in affected programs: 188 -> 179 (-4.79%)
helped: 6
HURT: 0
Clauses are helped.

total quadwords in shared programs: 90395 -> 90291 (-0.12%)
quadwords in affected programs: 2287 -> 2183 (-4.55%)
helped: 12
HURT: 0
Quadwords are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9420>
This commit is contained in:
Alyssa Rosenzweig
2021-03-05 02:19:22 +00:00
committed by Marge Bot
parent 253b795451
commit 718bfdb3da
2 changed files with 56 additions and 1 deletions

View File

@@ -1450,6 +1450,54 @@ bi_lower_flog2_32(bi_builder *b, bi_index dst, bi_index s0)
bi_fadd_f32_to(b, dst, x1, x2, BI_ROUND_NONE);
}
/* Bifrost has extremely coarse tables for approximating sin/cos, accessible as
* FSIN/COS_TABLE.u6, which multiplies the bottom 6-bits by pi/32 and
* calculates the results. We use them to calculate sin/cos via a Taylor
* approximation:
*
* f(x + e) = f(x) + e f'(x) + (e^2)/2 f''(x)
* sin(x + e) = sin(x) + e cos(x) - (e^2)/2 sin(x)
* cos(x + e) = cos(x) - e sin(x) - (e^2)/2 cos(x)
*/
#define TWO_OVER_PI bi_imm_f32(2.0f / 3.14159f)
#define MPI_OVER_TWO bi_imm_f32(-3.14159f / 2.0)
#define SINCOS_BIAS bi_imm_u32(0x49400000)
static void
bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
{
/* bottom 6-bits of result times pi/32 approximately s0 mod 2pi */
bi_index x_u6 = bi_fma_f32(b, s0, TWO_OVER_PI, SINCOS_BIAS, BI_ROUND_NONE);
/* Approximate domain error (small) */
bi_index e = bi_fma_f32(b, bi_fadd_f32(b, x_u6, bi_neg(SINCOS_BIAS),
BI_ROUND_NONE),
MPI_OVER_TWO, s0, BI_ROUND_NONE);
/* Lookup sin(x), cos(x) */
bi_index sinx = bi_fsin_table_u6(b, x_u6, false);
bi_index cosx = bi_fcos_table_u6(b, x_u6, false);
/* e^2 / 2 */
bi_index e2_over_2 = bi_fma_rscale_f32(b, e, e, bi_neg(bi_zero()),
bi_imm_u32(-1), BI_ROUND_NONE, BI_SPECIAL_NONE);
/* (-e^2)/2 f''(x) */
bi_index quadratic = bi_fma_f32(b, bi_neg(e2_over_2),
cos ? cosx : sinx,
bi_neg(bi_zero()), BI_ROUND_NONE);
/* e f'(x) - (e^2/2) f''(x) */
bi_instr *I = bi_fma_f32_to(b, bi_temp(b->shader), e,
cos ? bi_neg(sinx) : cosx,
quadratic, BI_ROUND_NONE);
I->clamp = BI_CLAMP_CLAMP_M1_1;
/* f(x) + e f'(x) - (e^2/2) f''(x) */
bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx, BI_ROUND_NONE);
}
static void
bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
{
@@ -1575,6 +1623,14 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_fadd_to(b, sz, dst, bi_abs(s0), bi_zero(), BI_ROUND_NONE);
break;
case nir_op_fsin:
bi_lower_fsincos_32(b, dst, s0, false);
break;
case nir_op_fcos:
bi_lower_fsincos_32(b, dst, s0, true);
break;
case nir_op_fexp2: {
assert(sz == 32); /* should've been lowered */

View File

@@ -48,7 +48,6 @@ static const nir_shader_compiler_options bifrost_nir_options = {
.lower_ifind_msb = true,
.lower_fdph = true,
.lower_fsqrt = true,
.lower_sincos = true,
.lower_wpos_pntc = true,
.lower_fsign = true,