r500: Add "Not quite SSA" and dead code elimination pass
In addition, this pass fixes non-native swizzles.
This commit is contained in:
@@ -38,6 +38,7 @@ DRIVER_SOURCES = \
|
|||||||
r300_texstate.c \
|
r300_texstate.c \
|
||||||
radeon_program.c \
|
radeon_program.c \
|
||||||
radeon_program_alu.c \
|
radeon_program_alu.c \
|
||||||
|
radeon_nqssadce.c \
|
||||||
r300_vertprog.c \
|
r300_vertprog.c \
|
||||||
r300_fragprog.c \
|
r300_fragprog.c \
|
||||||
r300_fragprog_emit.c \
|
r300_fragprog_emit.c \
|
||||||
|
@@ -27,6 +27,7 @@
|
|||||||
|
|
||||||
#include "r500_fragprog.h"
|
#include "r500_fragprog.h"
|
||||||
|
|
||||||
|
#include "radeon_nqssadce.h"
|
||||||
#include "radeon_program_alu.h"
|
#include "radeon_program_alu.h"
|
||||||
|
|
||||||
|
|
||||||
@@ -250,6 +251,57 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void nqssadce_init(struct nqssadce_state* s)
|
||||||
|
{
|
||||||
|
s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW;
|
||||||
|
s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W;
|
||||||
|
}
|
||||||
|
|
||||||
|
static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
|
||||||
|
{
|
||||||
|
GLuint relevant;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (reg.Abs)
|
||||||
|
return GL_TRUE;
|
||||||
|
|
||||||
|
relevant = 0;
|
||||||
|
for(i = 0; i < 3; ++i) {
|
||||||
|
GLuint swz = GET_SWZ(reg.Swizzle, i);
|
||||||
|
if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
|
||||||
|
relevant |= 1 << i;
|
||||||
|
}
|
||||||
|
if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
|
||||||
|
return GL_FALSE;
|
||||||
|
|
||||||
|
return GL_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement a non-native swizzle. This function assumes that
|
||||||
|
* is_native_swizzle returned true.
|
||||||
|
*/
|
||||||
|
static void nqssadce_build_swizzle(struct nqssadce_state *s,
|
||||||
|
struct prog_dst_register dst, struct prog_src_register src)
|
||||||
|
{
|
||||||
|
struct prog_instruction *inst;
|
||||||
|
|
||||||
|
_mesa_insert_instructions(s->Program, s->IP, 2);
|
||||||
|
inst = s->Program->Instructions + s->IP;
|
||||||
|
|
||||||
|
inst[0].Opcode = OPCODE_MOV;
|
||||||
|
inst[0].DstReg = dst;
|
||||||
|
inst[0].DstReg.WriteMask &= src.NegateBase;
|
||||||
|
inst[0].SrcReg[0] = src;
|
||||||
|
|
||||||
|
inst[1].Opcode = OPCODE_MOV;
|
||||||
|
inst[1].DstReg = dst;
|
||||||
|
inst[1].DstReg.WriteMask &= ~src.NegateBase;
|
||||||
|
inst[1].SrcReg[0] = src;
|
||||||
|
|
||||||
|
s->IP += 2;
|
||||||
|
}
|
||||||
|
|
||||||
static GLuint build_dtm(GLuint depthmode)
|
static GLuint build_dtm(GLuint depthmode)
|
||||||
{
|
{
|
||||||
switch(depthmode) {
|
switch(depthmode) {
|
||||||
@@ -327,7 +379,20 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
|
|||||||
3, transformations);
|
3, transformations);
|
||||||
|
|
||||||
if (RADEON_DEBUG & DEBUG_PIXEL) {
|
if (RADEON_DEBUG & DEBUG_PIXEL) {
|
||||||
_mesa_printf("Compiler: after all transformations:\n");
|
_mesa_printf("Compiler: after native rewrite:\n");
|
||||||
|
_mesa_print_program(compiler.program);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct radeon_nqssadce_descr nqssadce = {
|
||||||
|
.Init = &nqssadce_init,
|
||||||
|
.IsNativeSwizzle = &is_native_swizzle,
|
||||||
|
.BuildSwizzle = &nqssadce_build_swizzle,
|
||||||
|
.RewriteDepthOut = GL_TRUE
|
||||||
|
};
|
||||||
|
radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
|
||||||
|
|
||||||
|
if (RADEON_DEBUG & DEBUG_PIXEL) {
|
||||||
|
_mesa_printf("Compiler: after NqSSA-DCE:\n");
|
||||||
_mesa_print_program(compiler.program);
|
_mesa_print_program(compiler.program);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -163,23 +163,30 @@ static const struct prog_dst_register dstreg_template = {
|
|||||||
.WriteMask = WRITEMASK_XYZW
|
.WriteMask = WRITEMASK_XYZW
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static INLINE GLuint fix_hw_swizzle(GLuint swz)
|
||||||
|
{
|
||||||
|
if (swz == 5) swz = 6;
|
||||||
|
if (swz == SWIZZLE_NIL) swz = 4;
|
||||||
|
return swz;
|
||||||
|
}
|
||||||
|
|
||||||
static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) {
|
static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) {
|
||||||
GLuint swiz = 0x0;
|
GLuint swiz = 0x0;
|
||||||
GLuint temp;
|
GLuint temp;
|
||||||
/* This could be optimized, but it should be plenty fast already. */
|
/* This could be optimized, but it should be plenty fast already. */
|
||||||
int i;
|
int i;
|
||||||
|
int negatebase = 0;
|
||||||
for (i = 0; i < 3; i++) {
|
for (i = 0; i < 3; i++) {
|
||||||
temp = GET_SWZ(src.Swizzle, i);
|
temp = GET_SWZ(src.Swizzle, i);
|
||||||
/* Fix SWIZZLE_ONE */
|
if (temp != SWIZZLE_NIL && GET_BIT(src.NegateBase, i))
|
||||||
if (temp == 5) temp++;
|
negatebase = 1;
|
||||||
|
temp = fix_hw_swizzle(temp);
|
||||||
swiz |= temp << i*3;
|
swiz |= temp << i*3;
|
||||||
}
|
}
|
||||||
if (src.Abs) {
|
if (src.Abs)
|
||||||
swiz |= R500_SWIZ_MOD_ABS << 9;
|
swiz |= R500_SWIZ_MOD_ABS << 9;
|
||||||
} else if (src.NegateBase & 7) {
|
else if (negatebase)
|
||||||
ASSERT((src.NegateBase & 7) == 7);
|
|
||||||
swiz |= R500_SWIZ_MOD_NEG << 9;
|
swiz |= R500_SWIZ_MOD_NEG << 9;
|
||||||
}
|
|
||||||
if (src.NegateAbs)
|
if (src.NegateAbs)
|
||||||
swiz ^= R500_SWIZ_MOD_NEG << 9;
|
swiz ^= R500_SWIZ_MOD_NEG << 9;
|
||||||
return swiz;
|
return swiz;
|
||||||
@@ -191,8 +198,7 @@ static INLINE GLuint make_rgba_swizzle(GLuint src) {
|
|||||||
int i;
|
int i;
|
||||||
for (i = 0; i < 4; i++) {
|
for (i = 0; i < 4; i++) {
|
||||||
temp = GET_SWZ(src, i);
|
temp = GET_SWZ(src, i);
|
||||||
/* Fix SWIZZLE_ONE */
|
temp = fix_hw_swizzle(temp);
|
||||||
if (temp == 5) temp++;
|
|
||||||
swiz |= temp << i*3;
|
swiz |= temp << i*3;
|
||||||
}
|
}
|
||||||
return swiz;
|
return swiz;
|
||||||
@@ -201,7 +207,7 @@ static INLINE GLuint make_rgba_swizzle(GLuint src) {
|
|||||||
static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
|
static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
|
||||||
GLuint swiz = GET_SWZ(src.Swizzle, 3);
|
GLuint swiz = GET_SWZ(src.Swizzle, 3);
|
||||||
|
|
||||||
if (swiz == 5) swiz++;
|
swiz = fix_hw_swizzle(swiz);
|
||||||
|
|
||||||
if (src.Abs) {
|
if (src.Abs) {
|
||||||
swiz |= R500_SWIZ_MOD_ABS << 3;
|
swiz |= R500_SWIZ_MOD_ABS << 3;
|
||||||
@@ -217,7 +223,7 @@ static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
|
|||||||
static INLINE GLuint make_sop_swizzle(struct prog_src_register src) {
|
static INLINE GLuint make_sop_swizzle(struct prog_src_register src) {
|
||||||
GLuint swiz = GET_SWZ(src.Swizzle, 0);
|
GLuint swiz = GET_SWZ(src.Swizzle, 0);
|
||||||
|
|
||||||
if (swiz == 5) swiz++;
|
swiz = fix_hw_swizzle(swiz);
|
||||||
|
|
||||||
if (src.Abs) {
|
if (src.Abs) {
|
||||||
swiz |= R500_SWIZ_MOD_ABS << 3;
|
swiz |= R500_SWIZ_MOD_ABS << 3;
|
||||||
|
282
src/mesa/drivers/dri/r300/radeon_nqssadce.c
Normal file
282
src/mesa/drivers/dri/r300/radeon_nqssadce.c
Normal file
@@ -0,0 +1,282 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2008 Nicolai Haehnle.
|
||||||
|
*
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the
|
||||||
|
* next paragraph) shall be included in all copies or substantial
|
||||||
|
* portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||||
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||||
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
*
|
||||||
|
* "Not-quite SSA" and Dead-Code Elimination.
|
||||||
|
*
|
||||||
|
* @note This code uses SWIZZLE_NIL in a source register to indicate that
|
||||||
|
* the corresponding component is ignored by the corresponding instruction.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "radeon_nqssadce.h"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the @ref register_state for the given register (or 0 for untracked
|
||||||
|
* registers, i.e. constants).
|
||||||
|
*/
|
||||||
|
static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
|
||||||
|
{
|
||||||
|
switch(file) {
|
||||||
|
case PROGRAM_TEMPORARY: return &s->Temps[index];
|
||||||
|
case PROGRAM_OUTPUT: return &s->Outputs[index];
|
||||||
|
default: return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Left multiplication of a register with a swizzle
|
||||||
|
*
|
||||||
|
* @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
|
||||||
|
*/
|
||||||
|
static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
|
||||||
|
{
|
||||||
|
struct prog_src_register tmp = srcreg;
|
||||||
|
int i;
|
||||||
|
tmp.Swizzle = 0;
|
||||||
|
tmp.NegateBase = 0;
|
||||||
|
for(i = 0; i < 4; ++i) {
|
||||||
|
GLuint swz = GET_SWZ(swizzle, i);
|
||||||
|
if (swz < 4) {
|
||||||
|
tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
|
||||||
|
tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i;
|
||||||
|
} else {
|
||||||
|
tmp.Swizzle |= swz << (i*3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
|
||||||
|
struct prog_instruction *inst, GLint src, GLuint sourced)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
GLuint deswz_source = 0;
|
||||||
|
|
||||||
|
for(i = 0; i < 4; ++i) {
|
||||||
|
if (GET_BIT(sourced, i)) {
|
||||||
|
GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
|
||||||
|
deswz_source |= 1 << swz;
|
||||||
|
} else {
|
||||||
|
inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
|
||||||
|
inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
|
||||||
|
struct prog_dst_register dstreg = inst->DstReg;
|
||||||
|
dstreg.File = PROGRAM_TEMPORARY;
|
||||||
|
dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
|
||||||
|
dstreg.WriteMask = sourced;
|
||||||
|
|
||||||
|
s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
|
||||||
|
|
||||||
|
inst = s->Program->Instructions + s->IP;
|
||||||
|
inst->SrcReg[src].File = PROGRAM_TEMPORARY;
|
||||||
|
inst->SrcReg[src].Index = dstreg.Index;
|
||||||
|
inst->SrcReg[src].Swizzle = 0;
|
||||||
|
inst->SrcReg[src].NegateBase = 0;
|
||||||
|
inst->SrcReg[src].Abs = 0;
|
||||||
|
inst->SrcReg[src].NegateAbs = 0;
|
||||||
|
for(i = 0; i < 4; ++i) {
|
||||||
|
if (GET_BIT(sourced, i))
|
||||||
|
inst->SrcReg[src].Swizzle |= i << (3*i);
|
||||||
|
else
|
||||||
|
inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
|
||||||
|
}
|
||||||
|
deswz_source = sourced;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
|
||||||
|
if (regstate)
|
||||||
|
regstate->Sourced |= deswz_source & 0xf;
|
||||||
|
|
||||||
|
return inst;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void rewrite_depth_out(struct prog_instruction *inst)
|
||||||
|
{
|
||||||
|
if (inst->DstReg.WriteMask & WRITEMASK_Z) {
|
||||||
|
inst->DstReg.WriteMask = WRITEMASK_W;
|
||||||
|
} else {
|
||||||
|
inst->DstReg.WriteMask = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (inst->Opcode) {
|
||||||
|
case OPCODE_FRC:
|
||||||
|
case OPCODE_MOV:
|
||||||
|
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
|
||||||
|
break;
|
||||||
|
case OPCODE_ADD:
|
||||||
|
case OPCODE_MAX:
|
||||||
|
case OPCODE_MIN:
|
||||||
|
case OPCODE_MUL:
|
||||||
|
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
|
||||||
|
inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
|
||||||
|
break;
|
||||||
|
case OPCODE_CMP:
|
||||||
|
case OPCODE_MAD:
|
||||||
|
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
|
||||||
|
inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
|
||||||
|
inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// Scalar instructions needn't be reswizzled
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
|
||||||
|
{
|
||||||
|
int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < nsrc; ++i)
|
||||||
|
if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
|
||||||
|
inst->SrcReg[i].Index = newindex;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
|
||||||
|
{
|
||||||
|
GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
|
||||||
|
int ip;
|
||||||
|
for(ip = 0; ip < s->IP; ++ip) {
|
||||||
|
struct prog_instruction* inst = s->Program->Instructions + ip;
|
||||||
|
if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
|
||||||
|
inst->DstReg.Index = newindex;
|
||||||
|
unalias_srcregs(inst, oldindex, newindex);
|
||||||
|
}
|
||||||
|
unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle one instruction.
|
||||||
|
*/
|
||||||
|
static void process_instruction(struct nqssadce_state* s)
|
||||||
|
{
|
||||||
|
struct prog_instruction *inst = s->Program->Instructions + s->IP;
|
||||||
|
|
||||||
|
if (inst->Opcode == OPCODE_END)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (inst->Opcode != OPCODE_KIL) {
|
||||||
|
if (s->Descr->RewriteDepthOut) {
|
||||||
|
if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR)
|
||||||
|
rewrite_depth_out(inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
|
||||||
|
if (!regstate) {
|
||||||
|
_mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
|
||||||
|
inst->DstReg.File, inst->DstReg.Index);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
inst->DstReg.WriteMask &= regstate->Sourced;
|
||||||
|
regstate->Sourced &= ~inst->DstReg.WriteMask;
|
||||||
|
|
||||||
|
if (inst->DstReg.WriteMask == 0) {
|
||||||
|
_mesa_delete_instructions(s->Program, s->IP, 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
|
||||||
|
unalias_temporary(s, inst->DstReg.Index);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Attention: Due to swizzle emulation code, the following
|
||||||
|
* might change the instruction stream under us, so we have
|
||||||
|
* to be careful with the inst pointer. */
|
||||||
|
switch (inst->Opcode) {
|
||||||
|
case OPCODE_FRC:
|
||||||
|
case OPCODE_MOV:
|
||||||
|
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
|
||||||
|
break;
|
||||||
|
case OPCODE_ADD:
|
||||||
|
case OPCODE_MAX:
|
||||||
|
case OPCODE_MIN:
|
||||||
|
case OPCODE_MUL:
|
||||||
|
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
|
||||||
|
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
|
||||||
|
break;
|
||||||
|
case OPCODE_CMP:
|
||||||
|
case OPCODE_MAD:
|
||||||
|
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
|
||||||
|
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
|
||||||
|
inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
|
||||||
|
break;
|
||||||
|
case OPCODE_COS:
|
||||||
|
case OPCODE_EX2:
|
||||||
|
case OPCODE_LG2:
|
||||||
|
case OPCODE_RCP:
|
||||||
|
case OPCODE_RSQ:
|
||||||
|
case OPCODE_SIN:
|
||||||
|
inst = track_used_srcreg(s, inst, 0, 0x1);
|
||||||
|
break;
|
||||||
|
case OPCODE_DP3:
|
||||||
|
inst = track_used_srcreg(s, inst, 0, 0x7);
|
||||||
|
inst = track_used_srcreg(s, inst, 1, 0x7);
|
||||||
|
break;
|
||||||
|
case OPCODE_DP4:
|
||||||
|
inst = track_used_srcreg(s, inst, 0, 0xf);
|
||||||
|
inst = track_used_srcreg(s, inst, 1, 0xf);
|
||||||
|
break;
|
||||||
|
case OPCODE_KIL:
|
||||||
|
case OPCODE_TEX:
|
||||||
|
case OPCODE_TXB:
|
||||||
|
case OPCODE_TXP:
|
||||||
|
inst = track_used_srcreg(s, inst, 0, 0xf);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
_mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
|
||||||
|
{
|
||||||
|
struct nqssadce_state s;
|
||||||
|
|
||||||
|
_mesa_bzero(&s, sizeof(s));
|
||||||
|
s.Ctx = ctx;
|
||||||
|
s.Program = p;
|
||||||
|
s.Descr = descr;
|
||||||
|
s.Descr->Init(&s);
|
||||||
|
s.IP = p->NumInstructions;
|
||||||
|
|
||||||
|
while(s.IP > 0) {
|
||||||
|
s.IP--;
|
||||||
|
process_instruction(&s);
|
||||||
|
}
|
||||||
|
}
|
96
src/mesa/drivers/dri/r300/radeon_nqssadce.h
Normal file
96
src/mesa/drivers/dri/r300/radeon_nqssadce.h
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2008 Nicolai Haehnle.
|
||||||
|
*
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the
|
||||||
|
* next paragraph) shall be included in all copies or substantial
|
||||||
|
* portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||||
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||||
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __RADEON_PROGRAM_NQSSADCE_H_
|
||||||
|
#define __RADEON_PROGRAM_NQSSADCE_H_
|
||||||
|
|
||||||
|
#include "radeon_program.h"
|
||||||
|
|
||||||
|
|
||||||
|
struct register_state {
|
||||||
|
/**
|
||||||
|
* Bitmask indicating which components of the register are sourced
|
||||||
|
* by later instructions.
|
||||||
|
*/
|
||||||
|
GLuint Sourced : 4;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maintain state such as which registers are used, which registers are
|
||||||
|
* read from, etc.
|
||||||
|
*/
|
||||||
|
struct nqssadce_state {
|
||||||
|
GLcontext *Ctx;
|
||||||
|
struct gl_program *Program;
|
||||||
|
struct radeon_nqssadce_descr *Descr;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* All instructions after this instruction pointer have been dealt with.
|
||||||
|
*/
|
||||||
|
int IP;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Which registers are read by subsequent instructions?
|
||||||
|
*/
|
||||||
|
struct register_state Temps[MAX_PROGRAM_TEMPS];
|
||||||
|
struct register_state Outputs[VERT_RESULT_MAX];
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This structure contains a description of the hardware in-so-far as
|
||||||
|
* it is required for the NqSSA-DCE pass.
|
||||||
|
*/
|
||||||
|
struct radeon_nqssadce_descr {
|
||||||
|
/**
|
||||||
|
* Fill in which outputs
|
||||||
|
*/
|
||||||
|
void (*Init)(struct nqssadce_state *);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check whether the given swizzle, absolute and negate combination
|
||||||
|
* can be implemented natively by the hardware for this opcode.
|
||||||
|
*/
|
||||||
|
GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emit (at the current IP) the instruction MOV dst, src;
|
||||||
|
* The transformation will work recursively on the emitted instruction(s).
|
||||||
|
*/
|
||||||
|
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rewrite instructions that write to DEPR.z to write to DEPR.w
|
||||||
|
* instead (rewriting is done *before* the WriteMask test).
|
||||||
|
*/
|
||||||
|
GLboolean RewriteDepthOut;
|
||||||
|
void *Data;
|
||||||
|
};
|
||||||
|
|
||||||
|
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
|
||||||
|
|
||||||
|
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
|
@@ -112,7 +112,7 @@ _mesa_free_program_data(GLcontext *ctx)
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the default program objects in the given context to reference those
|
* Update the default program objects in the given context to reference those
|
||||||
* specified in the shared state and release those referencing the old
|
* specified in the shared state and release those referencing the old
|
||||||
* shared state.
|
* shared state.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
@@ -238,7 +238,7 @@ struct gl_program *
|
|||||||
_mesa_init_fragment_program( GLcontext *ctx, struct gl_fragment_program *prog,
|
_mesa_init_fragment_program( GLcontext *ctx, struct gl_fragment_program *prog,
|
||||||
GLenum target, GLuint id)
|
GLenum target, GLuint id)
|
||||||
{
|
{
|
||||||
if (prog)
|
if (prog)
|
||||||
return _mesa_init_program_struct( ctx, &prog->Base, target, id );
|
return _mesa_init_program_struct( ctx, &prog->Base, target, id );
|
||||||
else
|
else
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -252,7 +252,7 @@ struct gl_program *
|
|||||||
_mesa_init_vertex_program( GLcontext *ctx, struct gl_vertex_program *prog,
|
_mesa_init_vertex_program( GLcontext *ctx, struct gl_vertex_program *prog,
|
||||||
GLenum target, GLuint id)
|
GLenum target, GLuint id)
|
||||||
{
|
{
|
||||||
if (prog)
|
if (prog)
|
||||||
return _mesa_init_program_struct( ctx, &prog->Base, target, id );
|
return _mesa_init_program_struct( ctx, &prog->Base, target, id );
|
||||||
else
|
else
|
||||||
return NULL;
|
return NULL;
|
||||||
@@ -265,7 +265,7 @@ _mesa_init_vertex_program( GLcontext *ctx, struct gl_vertex_program *prog,
|
|||||||
* ctx->Driver.NewProgram. May be overridden (ie. replaced) by a
|
* ctx->Driver.NewProgram. May be overridden (ie. replaced) by a
|
||||||
* device driver function to implement OO deriviation with additional
|
* device driver function to implement OO deriviation with additional
|
||||||
* types not understood by this function.
|
* types not understood by this function.
|
||||||
*
|
*
|
||||||
* \param ctx context
|
* \param ctx context
|
||||||
* \param id program id/number
|
* \param id program id/number
|
||||||
* \param target program target/type
|
* \param target program target/type
|
||||||
@@ -309,7 +309,7 @@ _mesa_delete_program(GLcontext *ctx, struct gl_program *prog)
|
|||||||
|
|
||||||
if (prog == &_mesa_DummyProgram)
|
if (prog == &_mesa_DummyProgram)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (prog->String)
|
if (prog->String)
|
||||||
_mesa_free(prog->String);
|
_mesa_free(prog->String);
|
||||||
|
|
||||||
@@ -382,7 +382,7 @@ _mesa_reference_program(GLcontext *ctx,
|
|||||||
|
|
||||||
deleteFlag = ((*ptr)->RefCount == 0);
|
deleteFlag = ((*ptr)->RefCount == 0);
|
||||||
/*_glthread_UNLOCK_MUTEX((*ptr)->Mutex);*/
|
/*_glthread_UNLOCK_MUTEX((*ptr)->Mutex);*/
|
||||||
|
|
||||||
if (deleteFlag) {
|
if (deleteFlag) {
|
||||||
ASSERT(ctx);
|
ASSERT(ctx);
|
||||||
ctx->Driver.DeleteProgram(ctx, *ptr);
|
ctx->Driver.DeleteProgram(ctx, *ptr);
|
||||||
@@ -541,6 +541,53 @@ _mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete 'count' instructions at 'start' in the given program.
|
||||||
|
* Adjust branch targets accordingly.
|
||||||
|
*/
|
||||||
|
GLboolean
|
||||||
|
_mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count)
|
||||||
|
{
|
||||||
|
const GLuint origLen = prog->NumInstructions;
|
||||||
|
const GLuint newLen = origLen - count;
|
||||||
|
struct prog_instruction *newInst;
|
||||||
|
GLuint i;
|
||||||
|
|
||||||
|
/* adjust branches */
|
||||||
|
for (i = 0; i < prog->NumInstructions; i++) {
|
||||||
|
struct prog_instruction *inst = prog->Instructions + i;
|
||||||
|
if (inst->BranchTarget > 0) {
|
||||||
|
if (inst->BranchTarget >= start) {
|
||||||
|
inst->BranchTarget -= count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Alloc storage for new instructions */
|
||||||
|
newInst = _mesa_alloc_instructions(newLen);
|
||||||
|
if (!newInst) {
|
||||||
|
return GL_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy 'start' instructions into new instruction buffer */
|
||||||
|
_mesa_copy_instructions(newInst, prog->Instructions, start);
|
||||||
|
|
||||||
|
/* Copy the remaining/tail instructions to new inst buffer */
|
||||||
|
_mesa_copy_instructions(newInst + start,
|
||||||
|
prog->Instructions + start + count,
|
||||||
|
newLen - start);
|
||||||
|
|
||||||
|
/* free old instructions */
|
||||||
|
_mesa_free_instructions(prog->Instructions, origLen);
|
||||||
|
|
||||||
|
/* install new instructions */
|
||||||
|
prog->Instructions = newInst;
|
||||||
|
prog->NumInstructions = newLen;
|
||||||
|
|
||||||
|
return GL_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search instructions for registers that match (oldFile, oldIndex),
|
* Search instructions for registers that match (oldFile, oldIndex),
|
||||||
* replacing them with (newFile, newIndex).
|
* replacing them with (newFile, newIndex).
|
||||||
@@ -844,7 +891,7 @@ _mesa_BindProgram(GLenum target, GLuint id)
|
|||||||
* \note Not compiled into display lists.
|
* \note Not compiled into display lists.
|
||||||
* \note Called by both glDeleteProgramsNV and glDeleteProgramsARB.
|
* \note Called by both glDeleteProgramsNV and glDeleteProgramsARB.
|
||||||
*/
|
*/
|
||||||
void GLAPIENTRY
|
void GLAPIENTRY
|
||||||
_mesa_DeletePrograms(GLsizei n, const GLuint *ids)
|
_mesa_DeletePrograms(GLsizei n, const GLuint *ids)
|
||||||
{
|
{
|
||||||
GLint i;
|
GLint i;
|
||||||
|
@@ -67,13 +67,13 @@ _mesa_find_line_column(const GLubyte *string, const GLubyte *pos,
|
|||||||
GLint *line, GLint *col);
|
GLint *line, GLint *col);
|
||||||
|
|
||||||
|
|
||||||
extern struct gl_program *
|
extern struct gl_program *
|
||||||
_mesa_init_vertex_program(GLcontext *ctx,
|
_mesa_init_vertex_program(GLcontext *ctx,
|
||||||
struct gl_vertex_program *prog,
|
struct gl_vertex_program *prog,
|
||||||
GLenum target, GLuint id);
|
GLenum target, GLuint id);
|
||||||
|
|
||||||
extern struct gl_program *
|
extern struct gl_program *
|
||||||
_mesa_init_fragment_program(GLcontext *ctx,
|
_mesa_init_fragment_program(GLcontext *ctx,
|
||||||
struct gl_fragment_program *prog,
|
struct gl_fragment_program *prog,
|
||||||
GLenum target, GLuint id);
|
GLenum target, GLuint id);
|
||||||
|
|
||||||
@@ -115,6 +115,9 @@ _mesa_clone_program(GLcontext *ctx, const struct gl_program *prog);
|
|||||||
extern GLboolean
|
extern GLboolean
|
||||||
_mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count);
|
_mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count);
|
||||||
|
|
||||||
|
extern GLboolean
|
||||||
|
_mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count);
|
||||||
|
|
||||||
extern struct gl_program *
|
extern struct gl_program *
|
||||||
_mesa_combine_programs(GLcontext *ctx,
|
_mesa_combine_programs(GLcontext *ctx,
|
||||||
const struct gl_program *progA,
|
const struct gl_program *progA,
|
||||||
|
Reference in New Issue
Block a user