r300/compiler: Use rc_get_readers_normal() for presubtract optimizations

This commit is contained in:
Tom Stellard
2010-10-21 18:55:12 -07:00
parent cbc966b57b
commit aa43176ebd

View File

@@ -32,15 +32,12 @@
#include "radeon_compiler_util.h" #include "radeon_compiler_util.h"
#include "radeon_swizzle.h" #include "radeon_swizzle.h"
struct peephole_state { struct src_clobbered_data {
struct rc_instruction * Inst; unsigned int NumSrcRegs;
/** Stores a bitmask of the components that are still "alive" (i.e. unsigned int SrcMasks[3];
* they have not been written to since Inst was executed.)
*/
unsigned int WriteMask;
}; };
typedef void (*rc_presub_replace_fn)(struct peephole_state *, typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
struct rc_instruction *, struct rc_instruction *,
unsigned int); unsigned int);
@@ -67,27 +64,6 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct
return combine; return combine;
} }
struct copy_propagate_state {
struct radeon_compiler * C;
struct rc_instruction * Mov;
unsigned int Conflict:1;
/** Whether Mov's source has been clobbered */
unsigned int SourceClobbered:1;
/** Which components of Mov's destination register are still from that Mov? */
unsigned int MovMask:4;
/** Which components of Mov's destination register are clearly *not* from that Mov */
unsigned int DefinedMask:4;
/** Which components of Mov's source register are sourced */
unsigned int SourcedMask:4;
/** Branch depth beyond Mov; negative value indicates we left the Mov's block */
int BranchDepth;
};
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
struct rc_src_register * src) struct rc_src_register * src)
{ {
@@ -123,24 +99,36 @@ static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
} }
} }
static void copy_propagate_scan_write(void * data, struct rc_instruction * inst, static void is_src_clobbered_scan_write(
rc_register_file file, unsigned int index, unsigned int mask) void * data,
struct rc_instruction * inst,
rc_register_file file,
unsigned int index,
unsigned int mask)
{ {
unsigned int i;
struct rc_reader_data * reader_data = data; struct rc_reader_data * reader_data = data;
struct copy_propagate_state * s = reader_data->CbData; struct src_clobbered_data * d = reader_data->CbData;
for (i = 0; i < d->NumSrcRegs; i++) {
if (file == reader_data->Writer->U.I.SrcReg[i].File
&& index == reader_data->Writer->U.I.SrcReg[i].Index
&& (mask & d->SrcMasks[i])){
if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) {
if (mask & s->SourcedMask)
reader_data->AbortOnRead = 1; reader_data->AbortOnRead = 1;
} else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) { return;
reader_data->AbortOnRead = 1; }
if (reader_data->Writer->U.I.SrcReg[i].RelAddr &&
file == RC_FILE_ADDRESS) {
reader_data->AbortOnRead = 1;
return;
}
} }
} }
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
{ {
struct copy_propagate_state s;
struct rc_reader_data reader_data; struct rc_reader_data reader_data;
struct src_clobbered_data sc_data;
unsigned int i; unsigned int i;
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
@@ -149,22 +137,15 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
inst_mov->U.I.SaturateMode) inst_mov->U.I.SaturateMode)
return; return;
memset(&s, 0, sizeof(s)); sc_data.NumSrcRegs = 1;
s.C = c; sc_data.SrcMasks[0] = rc_swizzle_to_writemask(
s.Mov = inst_mov; inst_mov->U.I.SrcReg[0].Swizzle);
s.MovMask = inst_mov->U.I.DstReg.WriteMask;
s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
reader_data.CbData = &s; reader_data.CbData = &sc_data;
for(unsigned int chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
}
/* Get a list of all the readers of this MOV instruction. */ /* Get a list of all the readers of this MOV instruction. */
rc_get_readers_normal(c, inst_mov, &reader_data, rc_get_readers_normal(c, inst_mov, &reader_data,
copy_propagate_scan_read, copy_propagate_scan_write); copy_propagate_scan_read, is_src_clobbered_scan_write);
if (reader_data.Abort || reader_data.ReaderCount == 0) if (reader_data.Abort || reader_data.ReaderCount == 0)
return; return;
@@ -172,10 +153,10 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
/* Propagate the MOV instruction. */ /* Propagate the MOV instruction. */
for (i = 0; i < reader_data.ReaderCount; i++) { for (i = 0; i < reader_data.ReaderCount; i++) {
struct rc_instruction * inst = reader_data.Readers[i].Inst; struct rc_instruction * inst = reader_data.Readers[i].Inst;
*reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]); *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, inst_mov->U.I.SrcReg[0]);
if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
inst->U.I.PreSub = s.Mov->U.I.PreSub; inst->U.I.PreSub = inst_mov->U.I.PreSub;
} }
/* Finally, remove the original MOV instruction */ /* Finally, remove the original MOV instruction */
@@ -431,129 +412,99 @@ static int src_has_const_swz(struct rc_src_register src) {
return 0; return 0;
} }
static void peephole_scan_write(void * data, struct rc_instruction * inst, static void presub_scan_read(
rc_register_file file, unsigned int index, unsigned int mask) void * data,
struct rc_instruction * inst,
struct rc_src_register * src)
{ {
struct peephole_state * s = data; struct rc_reader_data * reader_data = data;
if(s->Inst->U.I.DstReg.File == file const struct rc_opcode_info * info =
&& s->Inst->U.I.DstReg.Index == index) { rc_get_opcode_info(inst->U.I.Opcode);
unsigned int common_mask = s->WriteMask & mask; /* XXX: There are some situations where instructions
s->WriteMask &= ~common_mask; * with more than 2 src registers can use the
* presubtract select, but to keep things simple we
* will disable presubtract on these instructions for
* now. */
if (info->NumSrcRegs > 2 || info->HasTexture) {
reader_data->Abort = 1;
return;
}
/* We can't use more than one presubtract value in an
* instruction, unless the two prsubtract operations
* are the same and read from the same registers.
* XXX For now we will limit instructions to only one presubtract
* value.*/
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
reader_data->Abort = 1;
return;
} }
} }
static int presub_helper( static int presub_helper(
struct radeon_compiler * c, struct radeon_compiler * c,
struct peephole_state * s, struct rc_instruction * inst_add,
rc_presubtract_op presub_opcode, rc_presubtract_op presub_opcode,
rc_presub_replace_fn presub_replace) rc_presub_replace_fn presub_replace)
{ {
struct rc_instruction * inst; struct rc_reader_data reader_data;
unsigned int can_remove = 0; struct src_clobbered_data sc_data;
unsigned int cant_sub = 0; unsigned int i;
for(inst = s->Inst->Next; inst != &c->Program.Instructions; sc_data.NumSrcRegs = 2;
inst = inst->Next) { sc_data.SrcMasks[0] = rc_swizzle_to_writemask(
unsigned int i; inst_add->U.I.SrcReg[0].Swizzle);
unsigned char can_use_presub = 1; sc_data.SrcMasks[1] = rc_swizzle_to_writemask(
inst_add->U.I.SrcReg[1].Swizzle);
reader_data.CbData = &sc_data;
rc_get_readers_normal(c, inst_add, &reader_data, presub_scan_read,
is_src_clobbered_scan_write);
if (reader_data.Abort || reader_data.ReaderCount == 0)
return 0;
for(i = 0; i < reader_data.ReaderCount; i++) {
unsigned int src_index;
struct rc_reader reader = reader_data.Readers[i];
const struct rc_opcode_info * info = const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode); rc_get_opcode_info(reader.Inst->U.I.Opcode);
/* XXX: There are some situations where instructions
* with more than 2 src registers can use the
* presubtract select, but to keep things simple we
* will disable presubtract on these instructions for
* now. */
if (info->NumSrcRegs > 2 || info->HasTexture) {
can_use_presub = 0;
}
/* We can't use more than one presubtract value in an for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
* instruction, unless the two prsubtract operations if (&reader.Inst->U.I.SrcReg[src_index] == reader.Src)
* are the same and read from the same registers. */ presub_replace(inst_add, reader.Inst, src_index);
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
if (inst->U.I.PreSub.Opcode != presub_opcode
|| inst->U.I.PreSub.SrcReg[0].File !=
s->Inst->U.I.SrcReg[1].File
|| inst->U.I.PreSub.SrcReg[0].Index !=
s->Inst->U.I.SrcReg[1].Index) {
can_use_presub = 0;
}
}
/* Even if the instruction can't use a presubtract operation
* we still need to check if the instruction reads from
* s->Inst->U.I.DstReg, because if it does we must not
* remove s->Inst. */
for(i = 0; i < info->NumSrcRegs; i++) {
unsigned int mask = src_reads_dst_mask(
inst->U.I.SrcReg[i], s->Inst->U.I.DstReg);
/* XXX We could be more aggressive here using
* presubtract. It is okay if SrcReg[i] only reads
* from some of the mask components. */
if(s->Inst->U.I.DstReg.WriteMask != mask) {
if (s->Inst->U.I.DstReg.WriteMask & mask) {
can_remove = 0;
break;
} else {
continue;
}
}
if (cant_sub || !can_use_presub) {
can_remove = 0;
break;
}
presub_replace(s, inst, i);
can_remove = 1;
}
if(!can_remove)
break;
rc_for_all_writes_mask(inst, peephole_scan_write, s);
/* If all components of inst_add's destination register have
* been written to by subsequent instructions, the original
* value of the destination register is no longer valid and
* we can't keep doing substitutions. */
if (!s->WriteMask){
break;
}
/* Make this instruction doesn't write to the presubtract source. */
if (inst->U.I.DstReg.WriteMask &
src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
inst->U.I.DstReg)
|| src_reads_dst_mask(s->Inst->U.I.SrcReg[0],
inst->U.I.DstReg)
|| info->IsFlowControl) {
cant_sub = 1;
} }
} }
return can_remove; return 1;
} }
/* This function assumes that s->Inst->U.I.SrcReg[0] and /* This function assumes that inst_add->U.I.SrcReg[0] and
* s->Inst->U.I.SrcReg[1] aren't both negative. */ * inst_add->U.I.SrcReg[1] aren't both negative. */
static void presub_replace_add(struct peephole_state *s, static void presub_replace_add(
struct rc_instruction * inst, struct rc_instruction * inst_add,
unsigned int src_index) struct rc_instruction * inst_reader,
unsigned int src_index)
{ {
rc_presubtract_op presub_opcode; rc_presubtract_op presub_opcode;
if (s->Inst->U.I.SrcReg[1].Negate || s->Inst->U.I.SrcReg[0].Negate) if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
presub_opcode = RC_PRESUB_SUB; presub_opcode = RC_PRESUB_SUB;
else else
presub_opcode = RC_PRESUB_ADD; presub_opcode = RC_PRESUB_ADD;
if (s->Inst->U.I.SrcReg[1].Negate) { if (inst_add->U.I.SrcReg[1].Negate) {
inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1]; inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[0]; inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
} else { } else {
inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0]; inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1]; inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
} }
inst->U.I.PreSub.SrcReg[0].Negate = 0; inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
inst->U.I.PreSub.SrcReg[1].Negate = 0; inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
inst->U.I.PreSub.Opcode = presub_opcode; inst_reader->U.I.PreSub.Opcode = presub_opcode;
inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], inst_reader->U.I.SrcReg[src_index] =
inst->U.I.PreSub.SrcReg[0]); chain_srcregs(inst_reader->U.I.SrcReg[src_index],
inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; inst_reader->U.I.PreSub.SrcReg[0]);
inst->U.I.SrcReg[src_index].Index = presub_opcode; inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
} }
static int is_presub_candidate(struct rc_instruction * inst) static int is_presub_candidate(struct rc_instruction * inst)
@@ -578,7 +529,6 @@ static int peephole_add_presub_add(
struct rc_src_register * src0 = NULL; struct rc_src_register * src0 = NULL;
struct rc_src_register * src1 = NULL; struct rc_src_register * src1 = NULL;
unsigned int i; unsigned int i;
struct peephole_state s;
if (!is_presub_candidate(inst_add)) if (!is_presub_candidate(inst_add))
return 0; return 0;
@@ -604,30 +554,28 @@ static int peephole_add_presub_add(
if (!src1) if (!src1)
return 0; return 0;
s.Inst = inst_add; if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
s.WriteMask = inst_add->U.I.DstReg.WriteMask;
if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
rc_remove_instruction(inst_add); rc_remove_instruction(inst_add);
return 1; return 1;
} }
return 0; return 0;
} }
static void presub_replace_inv(struct peephole_state * s, static void presub_replace_inv(
struct rc_instruction * inst, struct rc_instruction * inst_add,
unsigned int src_index) struct rc_instruction * inst_reader,
unsigned int src_index)
{ {
/* We must be careful not to modify s->Inst, since it /* We must be careful not to modify inst_add, since it
* is possible it will remain part of the program. * is possible it will remain part of the program.*/
* XXX Maybe pass a struct instead of a pointer for s->Inst.*/ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1]; inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
inst->U.I.PreSub.SrcReg[0].Negate = 0; inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
inst->U.I.PreSub.Opcode = RC_PRESUB_INV; inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index], inst_reader->U.I.PreSub.SrcReg[0]);
inst->U.I.PreSub.SrcReg[0]);
inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
} }
/** /**
@@ -645,7 +593,6 @@ static int peephole_add_presub_inv(
struct rc_instruction * inst_add) struct rc_instruction * inst_add)
{ {
unsigned int i, swz, mask; unsigned int i, swz, mask;
struct peephole_state s;
if (!is_presub_candidate(inst_add)) if (!is_presub_candidate(inst_add))
return 0; return 0;
@@ -674,11 +621,7 @@ static int peephole_add_presub_inv(
return 0; return 0;
} }
/* Setup the peephole_state information. */ if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
s.Inst = inst_add;
s.WriteMask = inst_add->U.I.DstReg.WriteMask;
if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
rc_remove_instruction(inst_add); rc_remove_instruction(inst_add);
return 1; return 1;
} }