Add runtime compiled x87 implementations for most of the remaining

opcodes.  When enabled via environment vars, gears runs and almost
looks right but other apps are still quite buggy.
This commit is contained in:
Keith Whitwell
2005-07-10 11:23:10 +00:00
parent 0cfbd849ec
commit 9311bc253e
3 changed files with 596 additions and 229 deletions

View File

@@ -110,6 +110,10 @@ static GLfloat ApproxPower(GLfloat x, GLfloat y)
return (GLfloat) _mesa_pow(x, y); return (GLfloat) _mesa_pow(x, y);
} }
static GLfloat rough_approx_log2_0_1(GLfloat x)
{
return LOG2(x);
}
@@ -273,13 +277,11 @@ static void do_EXP( struct arb_vp_machine *m, union instruction op )
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
GLfloat tmp = arg0[0]; GLfloat tmp = arg0[0];
GLfloat flr_tmp = FLOORF(tmp); GLfloat flr_tmp = FLOORF(tmp);
GLfloat frac_tmp = tmp - flr_tmp;
/* KW: nvvertexec has an optimized version of this which is pretty result[0] = ldexpf(1.0, (int)flr_tmp);
* hard to understand/validate, but avoids the RoughApproxExp2. result[1] = frac_tmp;
*/ result[2] = ldexpf(rough_approx_log2_0_1(frac_tmp), (int)flr_tmp);
result[0] = (GLfloat) (1 << (int)flr_tmp);
result[1] = tmp - flr_tmp;
result[2] = RoughApproxExp2(tmp);
result[3] = 1.0F; result[3] = 1.0F;
} }
@@ -322,18 +324,22 @@ static void do_LIT( struct arb_vp_machine *m, union instruction op )
{ {
GLfloat *result = m->File[0][op.alu.dst]; GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
GLfloat tmp[4]; GLfloat tmp[4];
tmp[0] = MAX2(arg0[0], 0.0F); tmp[0] = 1.0;
tmp[1] = MAX2(arg0[1], 0.0F); tmp[1] = 0.0;
tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon)); tmp[2] = 0.0;
tmp[3] = 1.0;
result[0] = 1.0; if (arg0[0] > 0.0) {
result[1] = tmp[0]; tmp[1] = arg0[0];
result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F;
result[3] = 1.0; if (arg0[1] > 0.0) {
tmp[2] = RoughApproxPower(arg0[1], arg0[3]);
}
}
COPY_4V(result, tmp);
} }
@@ -353,7 +359,7 @@ static void do_LOG( struct arb_vp_machine *m, union instruction op )
result[0] = (GLfloat) (exponent - 1); result[0] = (GLfloat) (exponent - 1);
result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */ result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
result[2] = result[0] + LOG2(result[1]); result[2] = exponent + LOG2(mantissa);
result[3] = 1.0; result[3] = 1.0;
} }
@@ -1201,9 +1207,6 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
p = (struct tnl_compiled_program *)program->TnlData; p = (struct tnl_compiled_program *)program->TnlData;
assert(p); assert(p);
/* Initialize regs where necessary:
*/
ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);
m->nr_inputs = m->nr_outputs = 0; m->nr_inputs = m->nr_outputs = 0;
@@ -1386,6 +1389,15 @@ static GLboolean init_vertex_program( GLcontext *ctx,
m->File[0] = ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16); m->File[0] = ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16);
/* Initialize regs where necessary:
*/
ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);
ASSIGN_4V(m->File[0][REG_ONES], 1, 1, 1, 1);
ASSIGN_4V(m->File[0][REG_SWZ], -1, 1, 0, 0);
ASSIGN_4V(m->File[0][REG_NEG], -1, -1, -1, -1);
ASSIGN_4V(m->File[0][REG_LIT], 1, 0, 0, 1);
ASSIGN_4V(m->File[0][REG_LIT2], 1, .5, .2, 1); /* debug value */
if (_mesa_getenv("MESA_EXPERIMENTAL")) if (_mesa_getenv("MESA_EXPERIMENTAL"))
m->try_codegen = 1; m->try_codegen = 1;
@@ -1402,6 +1414,9 @@ static GLboolean init_vertex_program( GLcontext *ctx,
if (ctx->_MaintainTnlProgram) if (ctx->_MaintainTnlProgram)
_mesa_allow_light_in_model( ctx, GL_FALSE ); _mesa_allow_light_in_model( ctx, GL_FALSE );
m->fpucntl_rnd_neg = RND_NEG_FPU; /* const value */
m->fpucntl_restore = RESTORE_FPU; /* const value */
return GL_TRUE; return GL_TRUE;
} }

View File

@@ -60,6 +60,9 @@
#define REG_ONES 65 /* 1,1,1,1 */ #define REG_ONES 65 /* 1,1,1,1 */
#define REG_SWZ 66 /* -1,1,0,0 */ #define REG_SWZ 66 /* -1,1,0,0 */
#define REG_NEG 67 /* -1,-1,-1,-1 */ #define REG_NEG 67 /* -1,-1,-1,-1 */
#define REG_LIT 68 /* 1,0,0,1 */
#define REG_LIT2 69 /* 1,0,0,1 */
#define REG_SCRATCH 70 /* internal temporary */
#define REG_UNDEF 127 /* special case - never used */ #define REG_UNDEF 127 /* special case - never used */
#define REG_MAX 128 #define REG_MAX 128
#define REG_INVALID ~0 #define REG_INVALID ~0
@@ -125,6 +128,13 @@ struct output {
/*--------------------------------------------------------------------------- */ /*--------------------------------------------------------------------------- */
#ifdef NO_FAST_MATH
#define RESTORE_FPU (DEFAULT_X86_FPU)
#define RND_NEG_FPU (DEFAULT_X86_FPU | 0x400)
#else
#define RESTORE_FPU (FAST_X86_FPU)
#define RND_NEG_FPU (FAST_X86_FPU | 0x400)
#endif
/*! /*!
* Private storage for the vertex program pipeline stage. * Private storage for the vertex program pipeline stage.
@@ -148,6 +158,9 @@ struct arb_vp_machine {
struct vertex_buffer *VB; struct vertex_buffer *VB;
GLcontext *ctx; GLcontext *ctx;
GLshort fpucntl_rnd_neg; /* constant value */
GLshort fpucntl_restore; /* constant value */
GLboolean try_codegen; GLboolean try_codegen;
}; };

File diff suppressed because it is too large Load Diff