pan/bi: Implement fsin/fcos
Instead of lowering it in NIR, use the lookup tables as inputs to a second-order Taylor expansion. shader-db results aren't amazing but keep in mind this is without backend CSE yet. total instructions in shared programs: 115913 -> 115707 (-0.18%) instructions in affected programs: 3151 -> 2945 (-6.54%) helped: 12 HURT: 0 Instructions are helped. total nops in shared programs: 84045 -> 84041 (<.01%) nops in affected programs: 1571 -> 1567 (-0.25%) helped: 1 HURT: 7 Inconclusive result (value mean confidence interval includes 0). total clauses in shared programs: 20498 -> 20489 (-0.04%) clauses in affected programs: 188 -> 179 (-4.79%) helped: 6 HURT: 0 Clauses are helped. total quadwords in shared programs: 90395 -> 90291 (-0.12%) quadwords in affected programs: 2287 -> 2183 (-4.55%) helped: 12 HURT: 0 Quadwords are helped. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9420>
This commit is contained in:

committed by
Marge Bot

parent
253b795451
commit
718bfdb3da
@@ -1450,6 +1450,54 @@ bi_lower_flog2_32(bi_builder *b, bi_index dst, bi_index s0)
|
||||
bi_fadd_f32_to(b, dst, x1, x2, BI_ROUND_NONE);
|
||||
}
|
||||
|
||||
/* Bifrost has extremely coarse tables for approximating sin/cos, accessible as
|
||||
* FSIN/COS_TABLE.u6, which multiplies the bottom 6-bits by pi/32 and
|
||||
* calculates the results. We use them to calculate sin/cos via a Taylor
|
||||
* approximation:
|
||||
*
|
||||
* f(x + e) = f(x) + e f'(x) + (e^2)/2 f''(x)
|
||||
* sin(x + e) = sin(x) + e cos(x) - (e^2)/2 sin(x)
|
||||
* cos(x + e) = cos(x) - e sin(x) - (e^2)/2 cos(x)
|
||||
*/
|
||||
|
||||
#define TWO_OVER_PI bi_imm_f32(2.0f / 3.14159f)
|
||||
#define MPI_OVER_TWO bi_imm_f32(-3.14159f / 2.0)
|
||||
#define SINCOS_BIAS bi_imm_u32(0x49400000)
|
||||
|
||||
static void
|
||||
bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos)
|
||||
{
|
||||
/* bottom 6-bits of result times pi/32 approximately s0 mod 2pi */
|
||||
bi_index x_u6 = bi_fma_f32(b, s0, TWO_OVER_PI, SINCOS_BIAS, BI_ROUND_NONE);
|
||||
|
||||
/* Approximate domain error (small) */
|
||||
bi_index e = bi_fma_f32(b, bi_fadd_f32(b, x_u6, bi_neg(SINCOS_BIAS),
|
||||
BI_ROUND_NONE),
|
||||
MPI_OVER_TWO, s0, BI_ROUND_NONE);
|
||||
|
||||
/* Lookup sin(x), cos(x) */
|
||||
bi_index sinx = bi_fsin_table_u6(b, x_u6, false);
|
||||
bi_index cosx = bi_fcos_table_u6(b, x_u6, false);
|
||||
|
||||
/* e^2 / 2 */
|
||||
bi_index e2_over_2 = bi_fma_rscale_f32(b, e, e, bi_neg(bi_zero()),
|
||||
bi_imm_u32(-1), BI_ROUND_NONE, BI_SPECIAL_NONE);
|
||||
|
||||
/* (-e^2)/2 f''(x) */
|
||||
bi_index quadratic = bi_fma_f32(b, bi_neg(e2_over_2),
|
||||
cos ? cosx : sinx,
|
||||
bi_neg(bi_zero()), BI_ROUND_NONE);
|
||||
|
||||
/* e f'(x) - (e^2/2) f''(x) */
|
||||
bi_instr *I = bi_fma_f32_to(b, bi_temp(b->shader), e,
|
||||
cos ? bi_neg(sinx) : cosx,
|
||||
quadratic, BI_ROUND_NONE);
|
||||
I->clamp = BI_CLAMP_CLAMP_M1_1;
|
||||
|
||||
/* f(x) + e f'(x) - (e^2/2) f''(x) */
|
||||
bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx, BI_ROUND_NONE);
|
||||
}
|
||||
|
||||
static void
|
||||
bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
||||
{
|
||||
@@ -1575,6 +1623,14 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
|
||||
bi_fadd_to(b, sz, dst, bi_abs(s0), bi_zero(), BI_ROUND_NONE);
|
||||
break;
|
||||
|
||||
case nir_op_fsin:
|
||||
bi_lower_fsincos_32(b, dst, s0, false);
|
||||
break;
|
||||
|
||||
case nir_op_fcos:
|
||||
bi_lower_fsincos_32(b, dst, s0, true);
|
||||
break;
|
||||
|
||||
case nir_op_fexp2: {
|
||||
assert(sz == 32); /* should've been lowered */
|
||||
|
||||
|
@@ -48,7 +48,6 @@ static const nir_shader_compiler_options bifrost_nir_options = {
|
||||
.lower_ifind_msb = true,
|
||||
.lower_fdph = true,
|
||||
.lower_fsqrt = true,
|
||||
.lower_sincos = true,
|
||||
|
||||
.lower_wpos_pntc = true,
|
||||
.lower_fsign = true,
|
||||
|
Reference in New Issue
Block a user