r300/compiler: Fix vertex program MAD emit
Only use the macro variant of MAD when absolutely necessary. Apparently it cannot deal with relative addressing. Signed-off-by: Nicolai Hähnle <nhaehnle@gmail.com>
This commit is contained in:
@@ -284,12 +284,52 @@ static void ei_mad(struct r300_vertex_program_code *vp,
|
|||||||
struct prog_instruction *vpi,
|
struct prog_instruction *vpi,
|
||||||
GLuint * inst)
|
GLuint * inst)
|
||||||
{
|
{
|
||||||
inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
|
/* Remarks about hardware limitations of MAD
|
||||||
GL_FALSE,
|
* (please preserve this comment, as this information is _NOT_
|
||||||
GL_TRUE,
|
* in the documentation provided by AMD).
|
||||||
t_dst_index(vp, &vpi->DstReg),
|
*
|
||||||
t_dst_mask(vpi->DstReg.WriteMask),
|
* As described in the documentation, MAD with three unique temporary
|
||||||
t_dst_class(vpi->DstReg.File));
|
* source registers requires the use of the macro version.
|
||||||
|
*
|
||||||
|
* However (and this is not mentioned in the documentation), apparently
|
||||||
|
* the macro version is _NOT_ a full superset of the normal version.
|
||||||
|
* In particular, the macro version does not always work when relative
|
||||||
|
* addressing is used in the source operands.
|
||||||
|
*
|
||||||
|
* This limitation caused incorrect rendering in Sauerbraten's OpenGL
|
||||||
|
* assembly shader path when using medium quality animations
|
||||||
|
* (i.e. animations with matrix blending instead of quaternion blending).
|
||||||
|
*
|
||||||
|
* Unfortunately, I (nha) have been unable to extract a Piglit regression
|
||||||
|
* test for this issue - for some reason, it is possible to have vertex
|
||||||
|
* programs whose prefix is *exactly* the same as the prefix of the
|
||||||
|
* offending program in Sauerbraten up to the offending instruction
|
||||||
|
* without causing any trouble.
|
||||||
|
*
|
||||||
|
* Bottom line: Only use the macro version only when really necessary;
|
||||||
|
* according to AMD docs, this should improve performance by one clock
|
||||||
|
* as a nice side bonus.
|
||||||
|
*/
|
||||||
|
if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY &&
|
||||||
|
vpi->SrcReg[1].File == PROGRAM_TEMPORARY &&
|
||||||
|
vpi->SrcReg[2].File == PROGRAM_TEMPORARY &&
|
||||||
|
vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
|
||||||
|
vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
|
||||||
|
vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
|
||||||
|
inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
|
||||||
|
GL_FALSE,
|
||||||
|
GL_TRUE,
|
||||||
|
t_dst_index(vp, &vpi->DstReg),
|
||||||
|
t_dst_mask(vpi->DstReg.WriteMask),
|
||||||
|
t_dst_class(vpi->DstReg.File));
|
||||||
|
} else {
|
||||||
|
inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
|
||||||
|
GL_FALSE,
|
||||||
|
GL_FALSE,
|
||||||
|
t_dst_index(vp, &vpi->DstReg),
|
||||||
|
t_dst_mask(vpi->DstReg.WriteMask),
|
||||||
|
t_dst_class(vpi->DstReg.File));
|
||||||
|
}
|
||||||
inst[1] = t_src(vp, &vpi->SrcReg[0]);
|
inst[1] = t_src(vp, &vpi->SrcReg[0]);
|
||||||
inst[2] = t_src(vp, &vpi->SrcReg[1]);
|
inst[2] = t_src(vp, &vpi->SrcReg[1]);
|
||||||
inst[3] = t_src(vp, &vpi->SrcReg[2]);
|
inst[3] = t_src(vp, &vpi->SrcReg[2]);
|
||||||
|
Reference in New Issue
Block a user