Add notes about intended precision of opcodes.

Remove dead floating point twiddles.
Fix bug translating MAD->MUL,ADD.
This commit is contained in:
Keith Whitwell
2005-06-10 08:32:27 +00:00
parent 1c48986e54
commit 52608d2d21

View File

@@ -61,45 +61,23 @@ struct compilation {
#define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
/**
* Set x to positive or negative infinity.
*
* XXX: FIXME - type punning.
*/
#if defined(USE_IEEE) || defined(_WIN32)
#define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
#define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
#elif defined(VMS)
#define SET_POS_INFINITY(x) x = __MAXFLOAT
#define SET_NEG_INFINITY(x) x = -__MAXFLOAT
#define IS_INF_OR_NAN(t) ((t) == __MAXFLOAT)
#else
#define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
#define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
#endif
#define FREXPF(a,b) frexpf(a,b)
#define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
/* FIXME: more type punning (despite use of fi_type...)
/* Lower precision functions for the EXP, LOG and LIT opcodes. The
* LOG2() implementation is probably not accurate enough, and the
* attempted optimization for Exp2 is definitely not accurate
* enough - it discards all of t's fractional bits!
*/
#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
static GLfloat RoughApproxLog2(GLfloat t)
{
return LOG2(t);
}
static GLfloat RoughApproxPow2(GLfloat t)
static GLfloat RoughApproxExp2(GLfloat t)
{
#if 0
/* This isn't nearly accurate enough - it discards all of t's
* fractional bits!
*/
fi_type fi;
fi.i = (GLint) t;
fi.i = (fi.i << 23) + 0x3f800000;
@@ -111,11 +89,25 @@ static GLfloat RoughApproxPow2(GLfloat t)
static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
{
#if 0
return RoughApproxPow2(y * RoughApproxLog2(x));
#else
return RoughApproxExp2(y * RoughApproxLog2(x));
}
/* Higher precision functions for the EX2, LG2 and POW opcodes:
*/
static GLfloat ApproxLog2(GLfloat t)
{
return (GLfloat) (log(t) * 1.442695F);
}
static GLfloat ApproxExp2(GLfloat t)
{
return (GLfloat) _mesa_pow(2.0, t);
}
static GLfloat ApproxPower(GLfloat x, GLfloat y)
{
return (GLfloat) _mesa_pow(x, y);
#endif
}
@@ -261,15 +253,20 @@ static void do_DST( struct arb_vp_machine *m, union instruction op )
}
/* Intended to be high precision:
*/
static void do_EX2( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = (GLfloat)RoughApproxPow2(arg0[0]);
result[0] = (GLfloat)ApproxExp2(arg0[0]);
PUFF(result);
}
/* Allowed to be lower precision:
*/
static void do_EXP( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
@@ -278,11 +275,11 @@ static void do_EXP( struct arb_vp_machine *m, union instruction op )
GLfloat flr_tmp = FLOORF(tmp);
/* KW: nvvertexec has an optimized version of this which is pretty
* hard to understand/validate, but avoids the RoughApproxPow2.
* hard to understand/validate, but avoids the RoughApproxExp2.
*/
result[0] = (GLfloat) (1 << (int)flr_tmp);
result[1] = tmp - flr_tmp;
result[2] = RoughApproxPow2(tmp);
result[2] = RoughApproxExp2(tmp);
result[3] = 1.0F;
}
@@ -308,12 +305,14 @@ static void do_FRC( struct arb_vp_machine *m, union instruction op )
result[3] = arg0[3] - FLOORF(arg0[3]);
}
/* High precision log base 2:
*/
static void do_LG2( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = RoughApproxLog2(arg0[0]);
result[0] = ApproxLog2(arg0[0]);
PUFF(result);
}
@@ -338,13 +337,15 @@ static void do_LIT( struct arb_vp_machine *m, union instruction op )
}
/* Intended to allow a lower precision than required for LG2 above.
*/
static void do_LOG( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
GLfloat tmp = FABSF(arg0[0]);
int exponent;
GLfloat mantissa = FREXPF(tmp, &exponent);
GLfloat mantissa = frexpf(tmp, &exponent);
result[0] = (GLfloat) (exponent - 1);
result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
@@ -401,13 +402,15 @@ static void do_MUL( struct arb_vp_machine *m, union instruction op )
}
/* Intended to be "high" precision
*/
static void do_POW( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (GLfloat)RoughApproxPower(arg0[0], arg1[0]);
result[0] = (GLfloat)ApproxPower(arg0[0], arg1[0]);
PUFF(result);
}
@@ -901,6 +904,11 @@ static void cvp_emit_inst( struct compilation *cp,
op->alu.file1 = reg[2].file;
op->alu.idx1 = reg[2].idx;
op->alu.dst = result;
if (result == REG_RES) {
op = cvp_next_instruction(cp);
op->dword = fixup.dword;
}
break;
case VP_OPCODE_ARL: