Add runtime compiled x87 implementations for most of the remaining

opcodes. When enabled via environment vars, gears runs and almost looks right but other apps are still quite buggy.
2005-07-10 11:23:10 +00:00
parent 0cfbd849ec
commit 9311bc253e
3 changed files with 596 additions and 229 deletions
--- a/src/mesa/tnl/t_vb_arbprogram.c
+++ b/src/mesa/tnl/t_vb_arbprogram.c
@@ -110,6 +110,10 @@ static GLfloat ApproxPower(GLfloat x, GLfloat y)
   return (GLfloat) _mesa_pow(x, y);
 }

+static GLfloat rough_approx_log2_0_1(GLfloat x)
+{
+   return LOG2(x);
+}



@@ -273,13 +277,11 @@ static void do_EXP( struct arb_vp_machine *m, union instruction op )
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   GLfloat tmp = arg0[0];
   GLfloat flr_tmp = FLOORF(tmp);
+   GLfloat frac_tmp = tmp - flr_tmp;

-   /* KW: nvvertexec has an optimized version of this which is pretty
-    * hard to understand/validate, but avoids the RoughApproxExp2.
-    */
-   result[0] = (GLfloat) (1 << (int)flr_tmp);
-   result[1] = tmp - flr_tmp;
-   result[2] = RoughApproxExp2(tmp);
+   result[0] = ldexpf(1.0, (int)flr_tmp);
+   result[1] = frac_tmp;
+   result[2] = ldexpf(rough_approx_log2_0_1(frac_tmp), (int)flr_tmp);
   result[3] = 1.0F;
 }

@@ -322,18 +324,22 @@ static void do_LIT( struct arb_vp_machine *m, union instruction op )
 {
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
-
-   const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
   GLfloat tmp[4];

-   tmp[0] = MAX2(arg0[0], 0.0F);
-   tmp[1] = MAX2(arg0[1], 0.0F);
-   tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon));
+   tmp[0] = 1.0;
+   tmp[1] = 0.0;
+   tmp[2] = 0.0;
+   tmp[3] = 1.0;

-   result[0] = 1.0;
-   result[1] = tmp[0];
-   result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F;
-   result[3] = 1.0;
+   if (arg0[0] > 0.0) {
+      tmp[1] = arg0[0];
+
+      if (arg0[1] > 0.0) {
+	 tmp[2] = RoughApproxPower(arg0[1], arg0[3]);
+      }
+   }
+
+   COPY_4V(result, tmp);
 }


@@ -353,7 +359,7 @@ static void do_LOG( struct arb_vp_machine *m, union instruction op )

   result[0] = (GLfloat) (exponent - 1);
   result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
-   result[2] = result[0] + LOG2(result[1]);
+   result[2] = exponent + LOG2(mantissa);
   result[3] = 1.0;
 }

@@ -1201,9 +1207,6 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
   p = (struct tnl_compiled_program *)program->TnlData;
   assert(p);

-   /* Initialize regs where necessary:
-    */
-   ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);

   m->nr_inputs = m->nr_outputs = 0;

@@ -1386,6 +1389,15 @@ static GLboolean init_vertex_program( GLcontext *ctx,

   m->File[0] = ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16);

+   /* Initialize regs where necessary:
+    */
+   ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);
+   ASSIGN_4V(m->File[0][REG_ONES], 1, 1, 1, 1);
+   ASSIGN_4V(m->File[0][REG_SWZ], -1, 1, 0, 0);
+   ASSIGN_4V(m->File[0][REG_NEG], -1, -1, -1, -1);
+   ASSIGN_4V(m->File[0][REG_LIT], 1, 0, 0, 1);
+   ASSIGN_4V(m->File[0][REG_LIT2], 1, .5, .2, 1); /* debug value */
+
   if (_mesa_getenv("MESA_EXPERIMENTAL"))
      m->try_codegen = 1;

@@ -1402,6 +1414,9 @@ static GLboolean init_vertex_program( GLcontext *ctx,
   if (ctx->_MaintainTnlProgram)
      _mesa_allow_light_in_model( ctx, GL_FALSE );

+   m->fpucntl_rnd_neg = RND_NEG_FPU; /* const value */
+   m->fpucntl_restore = RESTORE_FPU; /* const value */
+
   return GL_TRUE;
 }

--- a/src/mesa/tnl/t_vb_arbprogram.h
+++ b/src/mesa/tnl/t_vb_arbprogram.h
@@ -60,6 +60,9 @@
 #define REG_ONES   65		/* 1,1,1,1 */
 #define REG_SWZ    66		/* -1,1,0,0 */
 #define REG_NEG    67		/* -1,-1,-1,-1 */
+#define REG_LIT    68           /* 1,0,0,1 */
+#define REG_LIT2    69           /* 1,0,0,1 */
+#define REG_SCRATCH 70		/* internal temporary */
 #define REG_UNDEF  127		/* special case - never used */
 #define REG_MAX    128
 #define REG_INVALID ~0
@@ -125,6 +128,13 @@ struct output {


 /*--------------------------------------------------------------------------- */
+#ifdef NO_FAST_MATH
+#define RESTORE_FPU (DEFAULT_X86_FPU)
+#define RND_NEG_FPU (DEFAULT_X86_FPU | 0x400)
+#else
+#define RESTORE_FPU (FAST_X86_FPU)
+#define RND_NEG_FPU (FAST_X86_FPU | 0x400)
+#endif

 /*!
 * Private storage for the vertex program pipeline stage.
@@ -148,6 +158,9 @@ struct arb_vp_machine {
   struct vertex_buffer *VB;
   GLcontext *ctx;

+   GLshort fpucntl_rnd_neg;	/* constant value */
+   GLshort fpucntl_restore;	/* constant value */
+
   GLboolean try_codegen;
 };

--- a/src/mesa/tnl/t_vb_arbprogram_sse.c
+++ b/src/mesa/tnl/t_vb_arbprogram_sse.c