nvc0/ir: replace cvt instructions with add to improve shader performance
gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and gp107. reduces the amount of generated convert instructions by roughly 30% in shader-db. v2: only for 32 bit operations move some common code out of the switch handle OP_SAT with modifiers v3: only for registers and const memory rework if clauses merge isCvt into this patch v4: merge isCvt into its use Signed-off-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
@@ -719,6 +719,66 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
|
|||||||
bb->remove(bb->getEntry());
|
bb->remove(bb->getEntry());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// replaces instructions which would end up as f2f or i2i with faster
|
||||||
|
// alternatives:
|
||||||
|
// - fabs(a) -> fadd(0, abs a)
|
||||||
|
// - fneg(a) -> fadd(neg 0, neg a)
|
||||||
|
// - ineg(a) -> iadd(0, neg a)
|
||||||
|
// - fneg(abs a) -> fadd(neg 0, neg abs a)
|
||||||
|
// - sat(a) -> sat add(0, a)
|
||||||
|
void
|
||||||
|
NVC0LegalizePostRA::replaceCvt(Instruction *cvt)
|
||||||
|
{
|
||||||
|
if (!isFloatType(cvt->sType) && typeSizeof(cvt->sType) != 4)
|
||||||
|
return;
|
||||||
|
if (cvt->sType != cvt->dType)
|
||||||
|
return;
|
||||||
|
// we could make it work, but in this case we have optimizations disabled
|
||||||
|
// and we don't really care either way.
|
||||||
|
if (cvt->src(0).getFile() != FILE_GPR &&
|
||||||
|
cvt->src(0).getFile() != FILE_MEMORY_CONST)
|
||||||
|
return;
|
||||||
|
|
||||||
|
Modifier mod0, mod1;
|
||||||
|
|
||||||
|
switch (cvt->op) {
|
||||||
|
case OP_ABS:
|
||||||
|
if (cvt->src(0).mod)
|
||||||
|
return;
|
||||||
|
if (!isFloatType(cvt->sType))
|
||||||
|
return;
|
||||||
|
mod0 = 0;
|
||||||
|
mod1 = NV50_IR_MOD_ABS;
|
||||||
|
break;
|
||||||
|
case OP_NEG:
|
||||||
|
if (!isFloatType(cvt->sType) && cvt->src(0).mod)
|
||||||
|
return;
|
||||||
|
if (isFloatType(cvt->sType) &&
|
||||||
|
(cvt->src(0).mod && cvt->src(0).mod != Modifier(NV50_IR_MOD_ABS)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
mod0 = isFloatType(cvt->sType) ? NV50_IR_MOD_NEG : 0;
|
||||||
|
mod1 = cvt->src(0).mod == Modifier(NV50_IR_MOD_ABS) ?
|
||||||
|
NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG;
|
||||||
|
break;
|
||||||
|
case OP_SAT:
|
||||||
|
if (!isFloatType(cvt->sType) && cvt->src(0).mod.abs())
|
||||||
|
return;
|
||||||
|
mod0 = 0;
|
||||||
|
mod1 = cvt->src(0).mod;
|
||||||
|
cvt->saturate = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
cvt->op = OP_ADD;
|
||||||
|
cvt->moveSources(0, 1);
|
||||||
|
cvt->setSrc(0, rZero);
|
||||||
|
cvt->src(0).mod = mod0;
|
||||||
|
cvt->src(1).mod = mod1;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
NVC0LegalizePostRA::visit(BasicBlock *bb)
|
NVC0LegalizePostRA::visit(BasicBlock *bb)
|
||||||
{
|
{
|
||||||
@@ -758,6 +818,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
|
|||||||
next = hi;
|
next = hi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS)
|
||||||
|
replaceCvt(i);
|
||||||
|
|
||||||
if (i->op != OP_MOV && i->op != OP_PFETCH)
|
if (i->op != OP_MOV && i->op != OP_PFETCH)
|
||||||
replaceZero(i);
|
replaceZero(i);
|
||||||
}
|
}
|
||||||
|
@@ -81,6 +81,7 @@ private:
|
|||||||
virtual bool visit(Function *);
|
virtual bool visit(Function *);
|
||||||
virtual bool visit(BasicBlock *);
|
virtual bool visit(BasicBlock *);
|
||||||
|
|
||||||
|
void replaceCvt(Instruction *);
|
||||||
void replaceZero(Instruction *);
|
void replaceZero(Instruction *);
|
||||||
bool tryReplaceContWithBra(BasicBlock *);
|
bool tryReplaceContWithBra(BasicBlock *);
|
||||||
void propagateJoin(BasicBlock *);
|
void propagateJoin(BasicBlock *);
|
||||||
|
Reference in New Issue
Block a user