Add runtime compiled x87 implementations for most of the remaining
opcodes. When enabled via environment vars, gears runs and almost looks right but other apps are still quite buggy.
This commit is contained in:
@@ -110,6 +110,10 @@ static GLfloat ApproxPower(GLfloat x, GLfloat y)
|
||||
return (GLfloat) _mesa_pow(x, y);
|
||||
}
|
||||
|
||||
static GLfloat rough_approx_log2_0_1(GLfloat x)
|
||||
{
|
||||
return LOG2(x);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -273,13 +277,11 @@ static void do_EXP( struct arb_vp_machine *m, union instruction op )
|
||||
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
|
||||
GLfloat tmp = arg0[0];
|
||||
GLfloat flr_tmp = FLOORF(tmp);
|
||||
GLfloat frac_tmp = tmp - flr_tmp;
|
||||
|
||||
/* KW: nvvertexec has an optimized version of this which is pretty
|
||||
* hard to understand/validate, but avoids the RoughApproxExp2.
|
||||
*/
|
||||
result[0] = (GLfloat) (1 << (int)flr_tmp);
|
||||
result[1] = tmp - flr_tmp;
|
||||
result[2] = RoughApproxExp2(tmp);
|
||||
result[0] = ldexpf(1.0, (int)flr_tmp);
|
||||
result[1] = frac_tmp;
|
||||
result[2] = ldexpf(rough_approx_log2_0_1(frac_tmp), (int)flr_tmp);
|
||||
result[3] = 1.0F;
|
||||
}
|
||||
|
||||
@@ -322,18 +324,22 @@ static void do_LIT( struct arb_vp_machine *m, union instruction op )
|
||||
{
|
||||
GLfloat *result = m->File[0][op.alu.dst];
|
||||
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
|
||||
|
||||
const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
|
||||
GLfloat tmp[4];
|
||||
|
||||
tmp[0] = MAX2(arg0[0], 0.0F);
|
||||
tmp[1] = MAX2(arg0[1], 0.0F);
|
||||
tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon));
|
||||
tmp[0] = 1.0;
|
||||
tmp[1] = 0.0;
|
||||
tmp[2] = 0.0;
|
||||
tmp[3] = 1.0;
|
||||
|
||||
result[0] = 1.0;
|
||||
result[1] = tmp[0];
|
||||
result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F;
|
||||
result[3] = 1.0;
|
||||
if (arg0[0] > 0.0) {
|
||||
tmp[1] = arg0[0];
|
||||
|
||||
if (arg0[1] > 0.0) {
|
||||
tmp[2] = RoughApproxPower(arg0[1], arg0[3]);
|
||||
}
|
||||
}
|
||||
|
||||
COPY_4V(result, tmp);
|
||||
}
|
||||
|
||||
|
||||
@@ -353,7 +359,7 @@ static void do_LOG( struct arb_vp_machine *m, union instruction op )
|
||||
|
||||
result[0] = (GLfloat) (exponent - 1);
|
||||
result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
|
||||
result[2] = result[0] + LOG2(result[1]);
|
||||
result[2] = exponent + LOG2(mantissa);
|
||||
result[3] = 1.0;
|
||||
}
|
||||
|
||||
@@ -1201,9 +1207,6 @@ run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
|
||||
p = (struct tnl_compiled_program *)program->TnlData;
|
||||
assert(p);
|
||||
|
||||
/* Initialize regs where necessary:
|
||||
*/
|
||||
ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);
|
||||
|
||||
m->nr_inputs = m->nr_outputs = 0;
|
||||
|
||||
@@ -1386,6 +1389,15 @@ static GLboolean init_vertex_program( GLcontext *ctx,
|
||||
|
||||
m->File[0] = ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16);
|
||||
|
||||
/* Initialize regs where necessary:
|
||||
*/
|
||||
ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);
|
||||
ASSIGN_4V(m->File[0][REG_ONES], 1, 1, 1, 1);
|
||||
ASSIGN_4V(m->File[0][REG_SWZ], -1, 1, 0, 0);
|
||||
ASSIGN_4V(m->File[0][REG_NEG], -1, -1, -1, -1);
|
||||
ASSIGN_4V(m->File[0][REG_LIT], 1, 0, 0, 1);
|
||||
ASSIGN_4V(m->File[0][REG_LIT2], 1, .5, .2, 1); /* debug value */
|
||||
|
||||
if (_mesa_getenv("MESA_EXPERIMENTAL"))
|
||||
m->try_codegen = 1;
|
||||
|
||||
@@ -1402,6 +1414,9 @@ static GLboolean init_vertex_program( GLcontext *ctx,
|
||||
if (ctx->_MaintainTnlProgram)
|
||||
_mesa_allow_light_in_model( ctx, GL_FALSE );
|
||||
|
||||
m->fpucntl_rnd_neg = RND_NEG_FPU; /* const value */
|
||||
m->fpucntl_restore = RESTORE_FPU; /* const value */
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
@@ -60,6 +60,9 @@
|
||||
#define REG_ONES 65 /* 1,1,1,1 */
|
||||
#define REG_SWZ 66 /* -1,1,0,0 */
|
||||
#define REG_NEG 67 /* -1,-1,-1,-1 */
|
||||
#define REG_LIT 68 /* 1,0,0,1 */
|
||||
#define REG_LIT2 69 /* 1,0,0,1 */
|
||||
#define REG_SCRATCH 70 /* internal temporary */
|
||||
#define REG_UNDEF 127 /* special case - never used */
|
||||
#define REG_MAX 128
|
||||
#define REG_INVALID ~0
|
||||
@@ -125,6 +128,13 @@ struct output {
|
||||
|
||||
|
||||
/*--------------------------------------------------------------------------- */
|
||||
#ifdef NO_FAST_MATH
|
||||
#define RESTORE_FPU (DEFAULT_X86_FPU)
|
||||
#define RND_NEG_FPU (DEFAULT_X86_FPU | 0x400)
|
||||
#else
|
||||
#define RESTORE_FPU (FAST_X86_FPU)
|
||||
#define RND_NEG_FPU (FAST_X86_FPU | 0x400)
|
||||
#endif
|
||||
|
||||
/*!
|
||||
* Private storage for the vertex program pipeline stage.
|
||||
@@ -148,6 +158,9 @@ struct arb_vp_machine {
|
||||
struct vertex_buffer *VB;
|
||||
GLcontext *ctx;
|
||||
|
||||
GLshort fpucntl_rnd_neg; /* constant value */
|
||||
GLshort fpucntl_restore; /* constant value */
|
||||
|
||||
GLboolean try_codegen;
|
||||
};
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user