gallium: fixup definitions of the rsq and sqrt
GLSL spec says that rsq is undefined for src<=0, but the D3D10 spec says it needs to be a NaN, so lets stop taking an absolute value of the source which completely breaks that behavior. For the gl program we can simply insert an extra abs instrunction which produces the desired behavior there. Signed-off-by: Zack Rusin <zackr@vmware.com> Reviewed-by: Roland Scheidegger <sroland@vmware.com> Reviewed-by: Brian Paul <brianp@vmware.com>
This commit is contained in:
@@ -633,8 +633,6 @@ rsq_emit(
|
|||||||
struct lp_build_tgsi_context * bld_base,
|
struct lp_build_tgsi_context * bld_base,
|
||||||
struct lp_build_emit_data * emit_data)
|
struct lp_build_emit_data * emit_data)
|
||||||
{
|
{
|
||||||
emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
|
|
||||||
emit_data->args[0]);
|
|
||||||
if (bld_base->rsq_action.emit) {
|
if (bld_base->rsq_action.emit) {
|
||||||
bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
|
bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
|
||||||
} else {
|
} else {
|
||||||
@@ -1349,9 +1347,6 @@ rcp_emit_cpu(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Reciprical squareroot (CPU Only) */
|
/* Reciprical squareroot (CPU Only) */
|
||||||
|
|
||||||
/* This is not the same as TGSI_OPCODE_RSQ, which requres the argument to be
|
|
||||||
* greater than or equal to 0 */
|
|
||||||
static void
|
static void
|
||||||
recip_sqrt_emit_cpu(
|
recip_sqrt_emit_cpu(
|
||||||
const struct lp_build_tgsi_action * action,
|
const struct lp_build_tgsi_action * action,
|
||||||
|
@@ -339,20 +339,20 @@ micro_rsq(union tgsi_exec_channel *dst,
|
|||||||
assert(src->f[2] != 0.0f);
|
assert(src->f[2] != 0.0f);
|
||||||
assert(src->f[3] != 0.0f);
|
assert(src->f[3] != 0.0f);
|
||||||
#endif
|
#endif
|
||||||
dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
|
dst->f[0] = 1.0f / sqrtf(src->f[0]);
|
||||||
dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
|
dst->f[1] = 1.0f / sqrtf(src->f[1]);
|
||||||
dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
|
dst->f[2] = 1.0f / sqrtf(src->f[2]);
|
||||||
dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
|
dst->f[3] = 1.0f / sqrtf(src->f[3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
micro_sqrt(union tgsi_exec_channel *dst,
|
micro_sqrt(union tgsi_exec_channel *dst,
|
||||||
const union tgsi_exec_channel *src)
|
const union tgsi_exec_channel *src)
|
||||||
{
|
{
|
||||||
dst->f[0] = sqrtf(fabsf(src->f[0]));
|
dst->f[0] = sqrtf(src->f[0]);
|
||||||
dst->f[1] = sqrtf(fabsf(src->f[1]));
|
dst->f[1] = sqrtf(src->f[1]);
|
||||||
dst->f[2] = sqrtf(fabsf(src->f[2]));
|
dst->f[2] = sqrtf(src->f[2]);
|
||||||
dst->f[3] = sqrtf(fabsf(src->f[3]));
|
dst->f[3] = sqrtf(src->f[3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@@ -94,16 +94,16 @@ This instruction replicates its result.
|
|||||||
|
|
||||||
.. opcode:: RSQ - Reciprocal Square Root
|
.. opcode:: RSQ - Reciprocal Square Root
|
||||||
|
|
||||||
This instruction replicates its result.
|
This instruction replicates its result. The results are undefined for src <= 0.
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
||||||
dst = \frac{1}{\sqrt{|src.x|}}
|
dst = \frac{1}{\sqrt{src.x}}
|
||||||
|
|
||||||
|
|
||||||
.. opcode:: SQRT - Square Root
|
.. opcode:: SQRT - Square Root
|
||||||
|
|
||||||
This instruction replicates its result.
|
This instruction replicates its result. The results are undefined for src < 0.
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
||||||
|
@@ -615,8 +615,6 @@ translate_opcode( unsigned op )
|
|||||||
return TGSI_OPCODE_RCP;
|
return TGSI_OPCODE_RCP;
|
||||||
case OPCODE_RET:
|
case OPCODE_RET:
|
||||||
return TGSI_OPCODE_RET;
|
return TGSI_OPCODE_RET;
|
||||||
case OPCODE_RSQ:
|
|
||||||
return TGSI_OPCODE_RSQ;
|
|
||||||
case OPCODE_SCS:
|
case OPCODE_SCS:
|
||||||
return TGSI_OPCODE_SCS;
|
return TGSI_OPCODE_SCS;
|
||||||
case OPCODE_SEQ:
|
case OPCODE_SEQ:
|
||||||
@@ -756,6 +754,10 @@ compile_instruction(
|
|||||||
emit_ddy( t, dst[0], &inst->SrcReg[0] );
|
emit_ddy( t, dst[0], &inst->SrcReg[0] );
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OPCODE_RSQ:
|
||||||
|
ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) );
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
ureg_insn( ureg,
|
ureg_insn( ureg,
|
||||||
translate_opcode( inst->Opcode ),
|
translate_opcode( inst->Opcode ),
|
||||||
|
Reference in New Issue
Block a user