added _mesa_inv_sqrtf() and INV_SQRTF() (Josh Vanderhoof)
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
/* $Id: imports.c,v 1.32 2003/03/01 01:50:21 brianp Exp $ */
|
/* $Id: imports.c,v 1.33 2003/03/04 16:33:53 brianp Exp $ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mesa 3-D graphics library
|
* Mesa 3-D graphics library
|
||||||
@@ -346,6 +346,7 @@ _mesa_sqrtf( float x )
|
|||||||
* then reconstruct the result back into a float
|
* then reconstruct the result back into a float
|
||||||
*/
|
*/
|
||||||
num.i = ((sqrttab[num.i >> 16]) << 16) | ((e + 127) << 23);
|
num.i = ((sqrttab[num.i >> 16]) << 16) | ((e + 127) << 23);
|
||||||
|
|
||||||
return num.f;
|
return num.f;
|
||||||
#else
|
#else
|
||||||
return (float) _mesa_sqrtd((double) x);
|
return (float) _mesa_sqrtd((double) x);
|
||||||
@@ -353,6 +354,115 @@ _mesa_sqrtf( float x )
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
inv_sqrt - A single precision 1/sqrt routine for IEEE format floats.
|
||||||
|
written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk
|
||||||
|
and Vesa Karvonen.
|
||||||
|
*/
|
||||||
|
float
|
||||||
|
_mesa_inv_sqrtf(float n)
|
||||||
|
{
|
||||||
|
#if defined(USE_IEEE) && !defined(DEBUG)
|
||||||
|
float r0, x0, y0;
|
||||||
|
float r1, x1, y1;
|
||||||
|
float r2, x2, y2;
|
||||||
|
#if 0 /* not used, see below -BP */
|
||||||
|
float r3, x3, y3;
|
||||||
|
#endif
|
||||||
|
union { float f; unsigned int i; } u;
|
||||||
|
unsigned int magic;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Exponent part of the magic number -
|
||||||
|
|
||||||
|
We want to:
|
||||||
|
1. subtract the bias from the exponent,
|
||||||
|
2. negate it
|
||||||
|
3. divide by two (rounding towards -inf)
|
||||||
|
4. add the bias back
|
||||||
|
|
||||||
|
Which is the same as subtracting the exponent from 381 and dividing
|
||||||
|
by 2.
|
||||||
|
|
||||||
|
floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2)
|
||||||
|
*/
|
||||||
|
|
||||||
|
magic = 381 << 23;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Significand part of magic number -
|
||||||
|
|
||||||
|
With the current magic number, "(magic - u.i) >> 1" will give you:
|
||||||
|
|
||||||
|
for 1 <= u.f <= 2: 1.25 - u.f / 4
|
||||||
|
for 2 <= u.f <= 4: 1.00 - u.f / 8
|
||||||
|
|
||||||
|
This isn't a bad approximation of 1/sqrt. The maximum difference from
|
||||||
|
1/sqrt will be around .06. After three Newton-Raphson iterations, the
|
||||||
|
maximum difference is less than 4.5e-8. (Which is actually close
|
||||||
|
enough to make the following bias academic...)
|
||||||
|
|
||||||
|
To get a better approximation you can add a bias to the magic
|
||||||
|
number. For example, if you subtract 1/2 of the maximum difference in
|
||||||
|
the first approximation (.03), you will get the following function:
|
||||||
|
|
||||||
|
for 1 <= u.f <= 2: 1.22 - u.f / 4
|
||||||
|
for 2 <= u.f <= 3.76: 0.97 - u.f / 8
|
||||||
|
for 3.76 <= u.f <= 4: 0.72 - u.f / 16
|
||||||
|
(The 3.76 to 4 range is where the result is < .5.)
|
||||||
|
|
||||||
|
This is the closest possible initial approximation, but with a maximum
|
||||||
|
error of 8e-11 after three NR iterations, it is still not perfect. If
|
||||||
|
you subtract 0.0332281 instead of .03, the maximum error will be
|
||||||
|
2.5e-11 after three NR iterations, which should be about as close as
|
||||||
|
is possible.
|
||||||
|
|
||||||
|
for 1 <= u.f <= 2: 1.2167719 - u.f / 4
|
||||||
|
for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8
|
||||||
|
for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
magic -= (int)(0.0332281 * (1 << 25));
|
||||||
|
|
||||||
|
u.f = n;
|
||||||
|
u.i = (magic - u.i) >> 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Instead of Newton-Raphson, we use Goldschmidt's algorithm, which
|
||||||
|
allows more parallelism. From what I understand, the parallelism
|
||||||
|
comes at the cost of less precision, because it lets error
|
||||||
|
accumulate across iterations.
|
||||||
|
*/
|
||||||
|
x0 = 1.0f;
|
||||||
|
y0 = 0.5f * n;
|
||||||
|
r0 = u.f;
|
||||||
|
|
||||||
|
x1 = x0 * r0;
|
||||||
|
y1 = y0 * r0 * r0;
|
||||||
|
r1 = 1.5f - y1;
|
||||||
|
|
||||||
|
x2 = x1 * r1;
|
||||||
|
y2 = y1 * r1 * r1;
|
||||||
|
r2 = 1.5f - y2;
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
return x2 * r2; /* we can stop here, and be conformant -BP */
|
||||||
|
#else
|
||||||
|
x3 = x2 * r2;
|
||||||
|
y3 = y2 * r2 * r2;
|
||||||
|
r3 = 1.5f - y3;
|
||||||
|
|
||||||
|
return x3 * r3;
|
||||||
|
#endif
|
||||||
|
#elif defined(XFree86LOADER) && defined(IN_MODULE)
|
||||||
|
return 1.0F / xf86sqrt(n);
|
||||||
|
#else
|
||||||
|
return 1.0F / sqrt(n);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
double
|
double
|
||||||
_mesa_pow(double x, double y)
|
_mesa_pow(double x, double y)
|
||||||
{
|
{
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* $Id: imports.h,v 1.16 2003/03/03 21:44:39 brianp Exp $ */
|
/* $Id: imports.h,v 1.17 2003/03/04 16:33:53 brianp Exp $ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mesa 3-D graphics library
|
* Mesa 3-D graphics library
|
||||||
@@ -168,6 +168,16 @@ float asm_sqrt (float x);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/***
|
||||||
|
*** INV_SQRTF: single-precision inverse square root
|
||||||
|
***/
|
||||||
|
#if 0
|
||||||
|
#define INV_SQRTF(X) _mesa_inv_sqrt(X)
|
||||||
|
#else
|
||||||
|
#define INV_SQRTF(X) (1.0F / SQRTF(X)) /* this is faster on a P4 */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/***
|
/***
|
||||||
*** LOG2: Log base 2 of float
|
*** LOG2: Log base 2 of float
|
||||||
***/
|
***/
|
||||||
@@ -588,6 +598,9 @@ _mesa_sqrtd(double x);
|
|||||||
extern float
|
extern float
|
||||||
_mesa_sqrtf(float x);
|
_mesa_sqrtf(float x);
|
||||||
|
|
||||||
|
extern float
|
||||||
|
_mesa_inv_sqrtf(float x);
|
||||||
|
|
||||||
extern double
|
extern double
|
||||||
_mesa_pow(double x, double y);
|
_mesa_pow(double x, double y);
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* $Id: macros.h,v 1.31 2003/03/01 01:50:21 brianp Exp $ */
|
/* $Id: macros.h,v 1.32 2003/03/04 16:33:54 brianp Exp $ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mesa 3-D graphics library
|
* Mesa 3-D graphics library
|
||||||
@@ -572,7 +572,7 @@ do { \
|
|||||||
do { \
|
do { \
|
||||||
GLfloat len = (GLfloat) LEN_SQUARED_3FV(V); \
|
GLfloat len = (GLfloat) LEN_SQUARED_3FV(V); \
|
||||||
if (len) { \
|
if (len) { \
|
||||||
len = (GLfloat) (1.0 / SQRTF(len)); \
|
len = INV_SQRTF(len); \
|
||||||
(V)[0] = (GLfloat) ((V)[0] * len); \
|
(V)[0] = (GLfloat) ((V)[0] * len); \
|
||||||
(V)[1] = (GLfloat) ((V)[1] * len); \
|
(V)[1] = (GLfloat) ((V)[1] * len); \
|
||||||
(V)[2] = (GLfloat) ((V)[2] * len); \
|
(V)[2] = (GLfloat) ((V)[2] * len); \
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* $Id: nvvertexec.c,v 1.2 2003/03/01 01:50:22 brianp Exp $ */
|
/* $Id: nvvertexec.c,v 1.3 2003/03/04 16:33:55 brianp Exp $ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mesa 3-D graphics library
|
* Mesa 3-D graphics library
|
||||||
@@ -380,7 +380,7 @@ _mesa_exec_vertex_program(GLcontext *ctx, const struct vertex_program *program)
|
|||||||
{
|
{
|
||||||
GLfloat t[4];
|
GLfloat t[4];
|
||||||
fetch_vector1( &inst->SrcReg[0], machine, t );
|
fetch_vector1( &inst->SrcReg[0], machine, t );
|
||||||
t[0] = (float) (1.0 / sqrt(fabs(t[0])));
|
t[0] = INV_SQRTF(FABSF(t[0]));
|
||||||
t[1] = t[2] = t[3] = t[0];
|
t[1] = t[2] = t[3] = t[0];
|
||||||
store_vector4( &inst->DstReg, machine, t );
|
store_vector4( &inst->DstReg, machine, t );
|
||||||
}
|
}
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* $Id: m_norm_tmp.h,v 1.13 2003/03/01 01:50:24 brianp Exp $ */
|
/* $Id: m_norm_tmp.h,v 1.14 2003/03/04 16:34:01 brianp Exp $ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mesa 3-D graphics library
|
* Mesa 3-D graphics library
|
||||||
@@ -69,10 +69,10 @@ TAG(transform_normalize_normals)( const GLmatrix *mat,
|
|||||||
{
|
{
|
||||||
GLdouble len = tx*tx + ty*ty + tz*tz;
|
GLdouble len = tx*tx + ty*ty + tz*tz;
|
||||||
if (len > 1e-20) {
|
if (len > 1e-20) {
|
||||||
GLdouble scale = 1.0F / SQRTF(len);
|
GLfloat scale = INV_SQRTF(len);
|
||||||
out[i][0] = (GLfloat) (tx * scale);
|
out[i][0] = tx * scale;
|
||||||
out[i][1] = (GLfloat) (ty * scale);
|
out[i][1] = ty * scale;
|
||||||
out[i][2] = (GLfloat) (tz * scale);
|
out[i][2] = tz * scale;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
out[i][0] = out[i][1] = out[i][2] = 0;
|
out[i][0] = out[i][1] = out[i][2] = 0;
|
||||||
@@ -136,10 +136,10 @@ TAG(transform_normalize_normals_no_rot)( const GLmatrix *mat,
|
|||||||
{
|
{
|
||||||
GLdouble len = tx*tx + ty*ty + tz*tz;
|
GLdouble len = tx*tx + ty*ty + tz*tz;
|
||||||
if (len > 1e-20) {
|
if (len > 1e-20) {
|
||||||
GLdouble scale = 1.0F / SQRTF(len);
|
GLfloat scale = INV_SQRTF(len);
|
||||||
out[i][0] = (GLfloat) (tx * scale);
|
out[i][0] = tx * scale;
|
||||||
out[i][1] = (GLfloat) (ty * scale);
|
out[i][1] = ty * scale;
|
||||||
out[i][2] = (GLfloat) (tz * scale);
|
out[i][2] = tz * scale;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
out[i][0] = out[i][1] = out[i][2] = 0;
|
out[i][0] = out[i][1] = out[i][2] = 0;
|
||||||
@@ -323,10 +323,10 @@ TAG(normalize_normals)( const GLmatrix *mat,
|
|||||||
const GLfloat x = from[0], y = from[1], z = from[2];
|
const GLfloat x = from[0], y = from[1], z = from[2];
|
||||||
GLdouble len = x * x + y * y + z * z;
|
GLdouble len = x * x + y * y + z * z;
|
||||||
if (len > 1e-50) {
|
if (len > 1e-50) {
|
||||||
len = 1.0F / SQRTF(len);
|
len = INV_SQRTF(len);
|
||||||
out[i][0] = (GLfloat) (x * len);
|
out[i][0] = x * len;
|
||||||
out[i][1] = (GLfloat) (y * len);
|
out[i][1] = y * len;
|
||||||
out[i][2] = (GLfloat) (z * len);
|
out[i][2] = z * len;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
out[i][0] = x;
|
out[i][0] = x;
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* $Id: s_aalinetemp.h,v 1.22 2003/02/21 21:00:27 brianp Exp $ */
|
/* $Id: s_aalinetemp.h,v 1.23 2003/03/04 16:34:02 brianp Exp $ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mesa 3-D graphics library
|
* Mesa 3-D graphics library
|
||||||
@@ -132,7 +132,7 @@ NAME(line)(GLcontext *ctx, const SWvertex *v0, const SWvertex *v1)
|
|||||||
line.y1 = v1->win[1];
|
line.y1 = v1->win[1];
|
||||||
line.dx = line.x1 - line.x0;
|
line.dx = line.x1 - line.x0;
|
||||||
line.dy = line.y1 - line.y0;
|
line.dy = line.y1 - line.y0;
|
||||||
line.len = (GLfloat) sqrt(line.dx * line.dx + line.dy * line.dy);
|
line.len = SQRTF(line.dx * line.dx + line.dy * line.dy);
|
||||||
line.halfWidth = 0.5F * ctx->Line.Width;
|
line.halfWidth = 0.5F * ctx->Line.Width;
|
||||||
|
|
||||||
if (line.len == 0.0 || IS_INF_OR_NAN(line.len))
|
if (line.len == 0.0 || IS_INF_OR_NAN(line.len))
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* $Id: s_nvfragprog.c,v 1.5 2003/03/01 01:50:26 brianp Exp $ */
|
/* $Id: s_nvfragprog.c,v 1.6 2003/03/04 16:34:03 brianp Exp $ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mesa 3-D graphics library
|
* Mesa 3-D graphics library
|
||||||
@@ -582,8 +582,7 @@ execute_program(GLcontext *ctx, const struct fragment_program *program)
|
|||||||
{
|
{
|
||||||
GLfloat a[4], result[4];
|
GLfloat a[4], result[4];
|
||||||
fetch_vector1( &inst->SrcReg[0], machine, a );
|
fetch_vector1( &inst->SrcReg[0], machine, a );
|
||||||
result[0] = result[1] = result[2] = result[3]
|
result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
|
||||||
= 1.0F / SQRTF(a[0]);
|
|
||||||
store_vector4( inst, machine, result );
|
store_vector4( inst, machine, result );
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* $Id: s_span.c,v 1.56 2003/03/01 01:50:26 brianp Exp $ */
|
/* $Id: s_span.c,v 1.57 2003/03/04 16:34:03 brianp Exp $ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mesa 3-D graphics library
|
* Mesa 3-D graphics library
|
||||||
@@ -309,8 +309,8 @@ compute_lambda(GLfloat dsdx, GLfloat dsdy, GLfloat dtdx, GLfloat dtdy,
|
|||||||
GLfloat dvdx = texH * ((t + dtdx) / (q + dqdx) - t * invQ);
|
GLfloat dvdx = texH * ((t + dtdx) / (q + dqdx) - t * invQ);
|
||||||
GLfloat dudy = texW * ((s + dsdy) / (q + dqdy) - s * invQ);
|
GLfloat dudy = texW * ((s + dsdy) / (q + dqdy) - s * invQ);
|
||||||
GLfloat dvdy = texH * ((t + dtdy) / (q + dqdy) - t * invQ);
|
GLfloat dvdy = texH * ((t + dtdy) / (q + dqdy) - t * invQ);
|
||||||
GLfloat x = sqrt(dudx * dudx + dvdx * dvdx);
|
GLfloat x = SQRTF(dudx * dudx + dvdx * dvdx);
|
||||||
GLfloat y = sqrt(dudy * dudy + dvdy * dvdy);
|
GLfloat y = SQRTF(dudy * dudy + dvdy * dvdy);
|
||||||
GLfloat rho = MAX2(x, y);
|
GLfloat rho = MAX2(x, y);
|
||||||
GLfloat lambda = LOG2(rho);
|
GLfloat lambda = LOG2(rho);
|
||||||
return lambda;
|
return lambda;
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
/* $Id: t_vb_texgen.c,v 1.17 2003/03/01 01:50:27 brianp Exp $ */
|
/* $Id: t_vb_texgen.c,v 1.18 2003/03/04 16:34:04 brianp Exp $ */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mesa 3-D graphics library
|
* Mesa 3-D graphics library
|
||||||
@@ -113,7 +113,7 @@ static void build_m3( GLfloat f[][3], GLfloat m[],
|
|||||||
fz = f[i][2] = u[2] - norm[2] * two_nu;
|
fz = f[i][2] = u[2] - norm[2] * two_nu;
|
||||||
m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);
|
m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);
|
||||||
if (m[i] != 0.0F) {
|
if (m[i] != 0.0F) {
|
||||||
m[i] = 0.5F / SQRTF(m[i]);
|
m[i] = 0.5F * _mesa_inv_sqrtf(m[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -142,7 +142,7 @@ static void build_m2( GLfloat f[][3], GLfloat m[],
|
|||||||
fz = f[i][2] = u[2] - norm[2] * two_nu;
|
fz = f[i][2] = u[2] - norm[2] * two_nu;
|
||||||
m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);
|
m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);
|
||||||
if (m[i] != 0.0F) {
|
if (m[i] != 0.0F) {
|
||||||
m[i] = 0.5F / SQRTF(m[i]);
|
m[i] = 0.5F * _mesa_inv_sqrtf(m[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user