r500: Add "Not quite SSA" and dead code elimination pass

In addition, this pass fixes non-native swizzles.
This commit is contained in:
Nicolai Haehnle
2008-07-06 19:48:50 +02:00
parent 7904c9fad4
commit d8d086c20b
7 changed files with 524 additions and 24 deletions

View File

@@ -38,6 +38,7 @@ DRIVER_SOURCES = \
r300_texstate.c \ r300_texstate.c \
radeon_program.c \ radeon_program.c \
radeon_program_alu.c \ radeon_program_alu.c \
radeon_nqssadce.c \
r300_vertprog.c \ r300_vertprog.c \
r300_fragprog.c \ r300_fragprog.c \
r300_fragprog_emit.c \ r300_fragprog_emit.c \

View File

@@ -27,6 +27,7 @@
#include "r500_fragprog.h" #include "r500_fragprog.h"
#include "radeon_nqssadce.h"
#include "radeon_program_alu.h" #include "radeon_program_alu.h"
@@ -250,6 +251,57 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
} }
static void nqssadce_init(struct nqssadce_state* s)
{
s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW;
s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W;
}
static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
{
GLuint relevant;
int i;
if (reg.Abs)
return GL_TRUE;
relevant = 0;
for(i = 0; i < 3; ++i) {
GLuint swz = GET_SWZ(reg.Swizzle, i);
if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
relevant |= 1 << i;
}
if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
return GL_FALSE;
return GL_TRUE;
}
/**
* Implement a non-native swizzle. This function assumes that
* is_native_swizzle returned true.
*/
static void nqssadce_build_swizzle(struct nqssadce_state *s,
struct prog_dst_register dst, struct prog_src_register src)
{
struct prog_instruction *inst;
_mesa_insert_instructions(s->Program, s->IP, 2);
inst = s->Program->Instructions + s->IP;
inst[0].Opcode = OPCODE_MOV;
inst[0].DstReg = dst;
inst[0].DstReg.WriteMask &= src.NegateBase;
inst[0].SrcReg[0] = src;
inst[1].Opcode = OPCODE_MOV;
inst[1].DstReg = dst;
inst[1].DstReg.WriteMask &= ~src.NegateBase;
inst[1].SrcReg[0] = src;
s->IP += 2;
}
static GLuint build_dtm(GLuint depthmode) static GLuint build_dtm(GLuint depthmode)
{ {
switch(depthmode) { switch(depthmode) {
@@ -327,7 +379,20 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
3, transformations); 3, transformations);
if (RADEON_DEBUG & DEBUG_PIXEL) { if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Compiler: after all transformations:\n"); _mesa_printf("Compiler: after native rewrite:\n");
_mesa_print_program(compiler.program);
}
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &is_native_swizzle,
.BuildSwizzle = &nqssadce_build_swizzle,
.RewriteDepthOut = GL_TRUE
};
radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Compiler: after NqSSA-DCE:\n");
_mesa_print_program(compiler.program); _mesa_print_program(compiler.program);
} }

View File

@@ -163,23 +163,30 @@ static const struct prog_dst_register dstreg_template = {
.WriteMask = WRITEMASK_XYZW .WriteMask = WRITEMASK_XYZW
}; };
static INLINE GLuint fix_hw_swizzle(GLuint swz)
{
if (swz == 5) swz = 6;
if (swz == SWIZZLE_NIL) swz = 4;
return swz;
}
static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) { static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) {
GLuint swiz = 0x0; GLuint swiz = 0x0;
GLuint temp; GLuint temp;
/* This could be optimized, but it should be plenty fast already. */ /* This could be optimized, but it should be plenty fast already. */
int i; int i;
int negatebase = 0;
for (i = 0; i < 3; i++) { for (i = 0; i < 3; i++) {
temp = GET_SWZ(src.Swizzle, i); temp = GET_SWZ(src.Swizzle, i);
/* Fix SWIZZLE_ONE */ if (temp != SWIZZLE_NIL && GET_BIT(src.NegateBase, i))
if (temp == 5) temp++; negatebase = 1;
temp = fix_hw_swizzle(temp);
swiz |= temp << i*3; swiz |= temp << i*3;
} }
if (src.Abs) { if (src.Abs)
swiz |= R500_SWIZ_MOD_ABS << 9; swiz |= R500_SWIZ_MOD_ABS << 9;
} else if (src.NegateBase & 7) { else if (negatebase)
ASSERT((src.NegateBase & 7) == 7);
swiz |= R500_SWIZ_MOD_NEG << 9; swiz |= R500_SWIZ_MOD_NEG << 9;
}
if (src.NegateAbs) if (src.NegateAbs)
swiz ^= R500_SWIZ_MOD_NEG << 9; swiz ^= R500_SWIZ_MOD_NEG << 9;
return swiz; return swiz;
@@ -191,8 +198,7 @@ static INLINE GLuint make_rgba_swizzle(GLuint src) {
int i; int i;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
temp = GET_SWZ(src, i); temp = GET_SWZ(src, i);
/* Fix SWIZZLE_ONE */ temp = fix_hw_swizzle(temp);
if (temp == 5) temp++;
swiz |= temp << i*3; swiz |= temp << i*3;
} }
return swiz; return swiz;
@@ -201,7 +207,7 @@ static INLINE GLuint make_rgba_swizzle(GLuint src) {
static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) { static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
GLuint swiz = GET_SWZ(src.Swizzle, 3); GLuint swiz = GET_SWZ(src.Swizzle, 3);
if (swiz == 5) swiz++; swiz = fix_hw_swizzle(swiz);
if (src.Abs) { if (src.Abs) {
swiz |= R500_SWIZ_MOD_ABS << 3; swiz |= R500_SWIZ_MOD_ABS << 3;
@@ -217,7 +223,7 @@ static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
static INLINE GLuint make_sop_swizzle(struct prog_src_register src) { static INLINE GLuint make_sop_swizzle(struct prog_src_register src) {
GLuint swiz = GET_SWZ(src.Swizzle, 0); GLuint swiz = GET_SWZ(src.Swizzle, 0);
if (swiz == 5) swiz++; swiz = fix_hw_swizzle(swiz);
if (src.Abs) { if (src.Abs) {
swiz |= R500_SWIZ_MOD_ABS << 3; swiz |= R500_SWIZ_MOD_ABS << 3;

View File

@@ -0,0 +1,282 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* @file
*
* "Not-quite SSA" and Dead-Code Elimination.
*
* @note This code uses SWIZZLE_NIL in a source register to indicate that
* the corresponding component is ignored by the corresponding instruction.
*/
#include "radeon_nqssadce.h"
/**
* Return the @ref register_state for the given register (or 0 for untracked
* registers, i.e. constants).
*/
static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
{
switch(file) {
case PROGRAM_TEMPORARY: return &s->Temps[index];
case PROGRAM_OUTPUT: return &s->Outputs[index];
default: return 0;
}
}
/**
* Left multiplication of a register with a swizzle
*
* @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
*/
static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
{
struct prog_src_register tmp = srcreg;
int i;
tmp.Swizzle = 0;
tmp.NegateBase = 0;
for(i = 0; i < 4; ++i) {
GLuint swz = GET_SWZ(swizzle, i);
if (swz < 4) {
tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i;
} else {
tmp.Swizzle |= swz << (i*3);
}
}
return tmp;
}
static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
struct prog_instruction *inst, GLint src, GLuint sourced)
{
int i;
GLuint deswz_source = 0;
for(i = 0; i < 4; ++i) {
if (GET_BIT(sourced, i)) {
GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
deswz_source |= 1 << swz;
} else {
inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
}
}
if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
struct prog_dst_register dstreg = inst->DstReg;
dstreg.File = PROGRAM_TEMPORARY;
dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
dstreg.WriteMask = sourced;
s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
inst = s->Program->Instructions + s->IP;
inst->SrcReg[src].File = PROGRAM_TEMPORARY;
inst->SrcReg[src].Index = dstreg.Index;
inst->SrcReg[src].Swizzle = 0;
inst->SrcReg[src].NegateBase = 0;
inst->SrcReg[src].Abs = 0;
inst->SrcReg[src].NegateAbs = 0;
for(i = 0; i < 4; ++i) {
if (GET_BIT(sourced, i))
inst->SrcReg[src].Swizzle |= i << (3*i);
else
inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
}
deswz_source = sourced;
}
struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
if (regstate)
regstate->Sourced |= deswz_source & 0xf;
return inst;
}
static void rewrite_depth_out(struct prog_instruction *inst)
{
if (inst->DstReg.WriteMask & WRITEMASK_Z) {
inst->DstReg.WriteMask = WRITEMASK_W;
} else {
inst->DstReg.WriteMask = 0;
return;
}
switch (inst->Opcode) {
case OPCODE_FRC:
case OPCODE_MOV:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
break;
case OPCODE_ADD:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
break;
case OPCODE_CMP:
case OPCODE_MAD:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
break;
default:
// Scalar instructions needn't be reswizzled
break;
}
}
static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
{
int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
int i;
for(i = 0; i < nsrc; ++i)
if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
inst->SrcReg[i].Index = newindex;
}
static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
{
GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
int ip;
for(ip = 0; ip < s->IP; ++ip) {
struct prog_instruction* inst = s->Program->Instructions + ip;
if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
inst->DstReg.Index = newindex;
unalias_srcregs(inst, oldindex, newindex);
}
unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
}
/**
* Handle one instruction.
*/
static void process_instruction(struct nqssadce_state* s)
{
struct prog_instruction *inst = s->Program->Instructions + s->IP;
if (inst->Opcode == OPCODE_END)
return;
if (inst->Opcode != OPCODE_KIL) {
if (s->Descr->RewriteDepthOut) {
if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR)
rewrite_depth_out(inst);
}
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
if (!regstate) {
_mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
inst->DstReg.File, inst->DstReg.Index);
return;
}
inst->DstReg.WriteMask &= regstate->Sourced;
regstate->Sourced &= ~inst->DstReg.WriteMask;
if (inst->DstReg.WriteMask == 0) {
_mesa_delete_instructions(s->Program, s->IP, 1);
return;
}
if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
unalias_temporary(s, inst->DstReg.Index);
}
/* Attention: Due to swizzle emulation code, the following
* might change the instruction stream under us, so we have
* to be careful with the inst pointer. */
switch (inst->Opcode) {
case OPCODE_FRC:
case OPCODE_MOV:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
break;
case OPCODE_ADD:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
break;
case OPCODE_CMP:
case OPCODE_MAD:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
break;
case OPCODE_COS:
case OPCODE_EX2:
case OPCODE_LG2:
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
inst = track_used_srcreg(s, inst, 0, 0x1);
break;
case OPCODE_DP3:
inst = track_used_srcreg(s, inst, 0, 0x7);
inst = track_used_srcreg(s, inst, 1, 0x7);
break;
case OPCODE_DP4:
inst = track_used_srcreg(s, inst, 0, 0xf);
inst = track_used_srcreg(s, inst, 1, 0xf);
break;
case OPCODE_KIL:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
inst = track_used_srcreg(s, inst, 0, 0xf);
break;
default:
_mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
return;
}
}
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
{
struct nqssadce_state s;
_mesa_bzero(&s, sizeof(s));
s.Ctx = ctx;
s.Program = p;
s.Descr = descr;
s.Descr->Init(&s);
s.IP = p->NumInstructions;
while(s.IP > 0) {
s.IP--;
process_instruction(&s);
}
}

View File

@@ -0,0 +1,96 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __RADEON_PROGRAM_NQSSADCE_H_
#define __RADEON_PROGRAM_NQSSADCE_H_
#include "radeon_program.h"
struct register_state {
/**
* Bitmask indicating which components of the register are sourced
* by later instructions.
*/
GLuint Sourced : 4;
};
/**
* Maintain state such as which registers are used, which registers are
* read from, etc.
*/
struct nqssadce_state {
GLcontext *Ctx;
struct gl_program *Program;
struct radeon_nqssadce_descr *Descr;
/**
* All instructions after this instruction pointer have been dealt with.
*/
int IP;
/**
* Which registers are read by subsequent instructions?
*/
struct register_state Temps[MAX_PROGRAM_TEMPS];
struct register_state Outputs[VERT_RESULT_MAX];
};
/**
* This structure contains a description of the hardware in-so-far as
* it is required for the NqSSA-DCE pass.
*/
struct radeon_nqssadce_descr {
/**
* Fill in which outputs
*/
void (*Init)(struct nqssadce_state *);
/**
* Check whether the given swizzle, absolute and negate combination
* can be implemented natively by the hardware for this opcode.
*/
GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg);
/**
* Emit (at the current IP) the instruction MOV dst, src;
* The transformation will work recursively on the emitted instruction(s).
*/
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
/**
* Rewrite instructions that write to DEPR.z to write to DEPR.w
* instead (rewriting is done *before* the WriteMask test).
*/
GLboolean RewriteDepthOut;
void *Data;
};
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */

View File

@@ -541,6 +541,53 @@ _mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count)
} }
/**
* Delete 'count' instructions at 'start' in the given program.
* Adjust branch targets accordingly.
*/
GLboolean
_mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count)
{
const GLuint origLen = prog->NumInstructions;
const GLuint newLen = origLen - count;
struct prog_instruction *newInst;
GLuint i;
/* adjust branches */
for (i = 0; i < prog->NumInstructions; i++) {
struct prog_instruction *inst = prog->Instructions + i;
if (inst->BranchTarget > 0) {
if (inst->BranchTarget >= start) {
inst->BranchTarget -= count;
}
}
}
/* Alloc storage for new instructions */
newInst = _mesa_alloc_instructions(newLen);
if (!newInst) {
return GL_FALSE;
}
/* Copy 'start' instructions into new instruction buffer */
_mesa_copy_instructions(newInst, prog->Instructions, start);
/* Copy the remaining/tail instructions to new inst buffer */
_mesa_copy_instructions(newInst + start,
prog->Instructions + start + count,
newLen - start);
/* free old instructions */
_mesa_free_instructions(prog->Instructions, origLen);
/* install new instructions */
prog->Instructions = newInst;
prog->NumInstructions = newLen;
return GL_TRUE;
}
/** /**
* Search instructions for registers that match (oldFile, oldIndex), * Search instructions for registers that match (oldFile, oldIndex),
* replacing them with (newFile, newIndex). * replacing them with (newFile, newIndex).

View File

@@ -115,6 +115,9 @@ _mesa_clone_program(GLcontext *ctx, const struct gl_program *prog);
extern GLboolean extern GLboolean
_mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count); _mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count);
extern GLboolean
_mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count);
extern struct gl_program * extern struct gl_program *
_mesa_combine_programs(GLcontext *ctx, _mesa_combine_programs(GLcontext *ctx,
const struct gl_program *progA, const struct gl_program *progA,