diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index a15dd327a0f..27e545b9c84 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -1450,6 +1450,54 @@ bi_lower_flog2_32(bi_builder *b, bi_index dst, bi_index s0) bi_fadd_f32_to(b, dst, x1, x2, BI_ROUND_NONE); } +/* Bifrost has extremely coarse tables for approximating sin/cos, accessible as + * FSIN/COS_TABLE.u6, which multiplies the bottom 6-bits by pi/32 and + * calculates the results. We use them to calculate sin/cos via a Taylor + * approximation: + * + * f(x + e) = f(x) + e f'(x) + (e^2)/2 f''(x) + * sin(x + e) = sin(x) + e cos(x) - (e^2)/2 sin(x) + * cos(x + e) = cos(x) - e sin(x) - (e^2)/2 cos(x) + */ + +#define TWO_OVER_PI bi_imm_f32(2.0f / 3.14159f) +#define MPI_OVER_TWO bi_imm_f32(-3.14159f / 2.0) +#define SINCOS_BIAS bi_imm_u32(0x49400000) + +static void +bi_lower_fsincos_32(bi_builder *b, bi_index dst, bi_index s0, bool cos) +{ + /* bottom 6-bits of result times pi/32 approximately s0 mod 2pi */ + bi_index x_u6 = bi_fma_f32(b, s0, TWO_OVER_PI, SINCOS_BIAS, BI_ROUND_NONE); + + /* Approximate domain error (small) */ + bi_index e = bi_fma_f32(b, bi_fadd_f32(b, x_u6, bi_neg(SINCOS_BIAS), + BI_ROUND_NONE), + MPI_OVER_TWO, s0, BI_ROUND_NONE); + + /* Lookup sin(x), cos(x) */ + bi_index sinx = bi_fsin_table_u6(b, x_u6, false); + bi_index cosx = bi_fcos_table_u6(b, x_u6, false); + + /* e^2 / 2 */ + bi_index e2_over_2 = bi_fma_rscale_f32(b, e, e, bi_neg(bi_zero()), + bi_imm_u32(-1), BI_ROUND_NONE, BI_SPECIAL_NONE); + + /* (-e^2)/2 f''(x) */ + bi_index quadratic = bi_fma_f32(b, bi_neg(e2_over_2), + cos ? cosx : sinx, + bi_neg(bi_zero()), BI_ROUND_NONE); + + /* e f'(x) - (e^2/2) f''(x) */ + bi_instr *I = bi_fma_f32_to(b, bi_temp(b->shader), e, + cos ? bi_neg(sinx) : cosx, + quadratic, BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_M1_1; + + /* f(x) + e f'(x) - (e^2/2) f''(x) */ + bi_fadd_f32_to(b, dst, I->dest[0], cos ? cosx : sinx, BI_ROUND_NONE); +} + static void bi_emit_alu(bi_builder *b, nir_alu_instr *instr) { @@ -1575,6 +1623,14 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) bi_fadd_to(b, sz, dst, bi_abs(s0), bi_zero(), BI_ROUND_NONE); break; + case nir_op_fsin: + bi_lower_fsincos_32(b, dst, s0, false); + break; + + case nir_op_fcos: + bi_lower_fsincos_32(b, dst, s0, true); + break; + case nir_op_fexp2: { assert(sz == 32); /* should've been lowered */ diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h index 4fe468c1e12..020ca55aa29 100644 --- a/src/panfrost/bifrost/bifrost_compile.h +++ b/src/panfrost/bifrost/bifrost_compile.h @@ -48,7 +48,6 @@ static const nir_shader_compiler_options bifrost_nir_options = { .lower_ifind_msb = true, .lower_fdph = true, .lower_fsqrt = true, - .lower_sincos = true, .lower_wpos_pntc = true, .lower_fsign = true,