nv50/ir/tgsi: handle TGSI_OPCODE_LOAD,STORE

Squashed and (heavily) modified original patches by Francisco Jerez:
nv50/ir/tgsi: Implement resource LOAD/STORE (wip).
nv50/ir/tgsi: Emit SUST/SULD for surface access, and add CB LOAD/STORE support
nv50/ir/tgsi: Fix/clean up the LOAD/STORE handling code.

Left out for now:
nv50/ir/tgsi: Resource indirect indexing

Treating raw, read-only surfaces as constant buffers (CBs) was removed
because CBs are limited to a size of 64 KiB which isn't desireable, and
because this decision should probably be made by the state tracker.
If we used a number of CB slots for surfaces, it might find that we
cannot accomodate the advertised limit.
This commit is contained in:
Christoph Bumiller
2013-02-23 00:39:23 +01:00
parent d105b3df14
commit c2dfcd7f0e
7 changed files with 303 additions and 30 deletions

View File

@@ -871,9 +871,9 @@ public:
struct { struct {
Target target; Target target;
uint8_t r; uint16_t r;
uint16_t s;
int8_t rIndirectSrc; int8_t rIndirectSrc;
uint8_t s;
int8_t sIndirectSrc; int8_t sIndirectSrc;
uint8_t mask; uint8_t mask;

View File

@@ -240,15 +240,17 @@ BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst,
return insn; return insn;
} }
Instruction * TexInstruction *
BuildUtil::mkTex(operation op, TexTarget targ, uint8_t tic, uint8_t tsc, BuildUtil::mkTex(operation op, TexTarget targ,
Value **def, Value **src) uint16_t tic, uint16_t tsc,
const std::vector<Value *> &def,
const std::vector<Value *> &src)
{ {
TexInstruction *tex = new_TexInstruction(func, op); TexInstruction *tex = new_TexInstruction(func, op);
for (int d = 0; d < 4 && def[d]; ++d) for (size_t d = 0; d < def.size() && def[d]; ++d)
tex->setDef(d, def[d]); tex->setDef(d, def[d]);
for (int s = 0; s < 4 && src[s]; ++s) for (size_t s = 0; s < src.size() && src[s]; ++s)
tex->setSrc(s, src[s]); tex->setSrc(s, src[s]);
tex->setTexture(targ, tic, tsc); tex->setTexture(targ, tic, tsc);

View File

@@ -75,8 +75,10 @@ public:
CmpInstruction *mkCmp(operation, CondCode, DataType, CmpInstruction *mkCmp(operation, CondCode, DataType,
Value *, Value *,
Value *, Value *, Value * = NULL); Value *, Value *, Value * = NULL);
Instruction *mkTex(operation, TexTarget, uint8_t tic, uint8_t tsc, TexInstruction *mkTex(operation, TexTarget,
Value **def, Value **src); uint16_t tic, uint16_t tsc,
const std::vector<Value *> &def,
const std::vector<Value *> &src);
Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *); Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *);
FlowInstruction *mkFlow(operation, void *target, CondCode, Value *pred); FlowInstruction *mkFlow(operation, void *target, CondCode, Value *pred);

View File

@@ -161,6 +161,10 @@ struct nv50_ir_prog_info
boolean separateFragData; boolean separateFragData;
boolean usesDiscard; boolean usesDiscard;
} fp; } fp;
struct {
uint32_t inputOffset; /* base address for user args */
uint32_t sharedOffset; /* reserved space in s[] */
} cp;
} prop; } prop;
struct { struct {
@@ -179,6 +183,7 @@ struct nv50_ir_prog_info
uint8_t sampleMask; /* output index of SampleMask */ uint8_t sampleMask; /* output index of SampleMask */
uint8_t backFaceColor[2]; /* input/output indices of back face colour */ uint8_t backFaceColor[2]; /* input/output indices of back face colour */
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */
uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */ uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
uint16_t texBindBase; /* base address for tex handles (nve4) */ uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */ uint16_t suInfoBase; /* base address for surface info (nve4) */

View File

@@ -559,7 +559,6 @@ static nv50_ir::operation translateOpcode(uint opcode)
NV50_IR_OPCODE_CASE(USLT, SET); NV50_IR_OPCODE_CASE(USLT, SET);
NV50_IR_OPCODE_CASE(USNE, SET); NV50_IR_OPCODE_CASE(USNE, SET);
NV50_IR_OPCODE_CASE(LOAD, TXF);
NV50_IR_OPCODE_CASE(SAMPLE, TEX); NV50_IR_OPCODE_CASE(SAMPLE, TEX);
NV50_IR_OPCODE_CASE(SAMPLE_B, TXB); NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
NV50_IR_OPCODE_CASE(SAMPLE_C, TEX); NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
@@ -620,8 +619,17 @@ public:
int clipVertexOutput; int clipVertexOutput;
uint8_t *samplerViewTargets; // TGSI_TEXTURE_* struct TextureView {
unsigned samplerViewCount; uint8_t target; // TGSI_TEXTURE_*
};
std::vector<TextureView> textureViews;
struct Resource {
uint8_t target; // TGSI_TEXTURE_*
bool raw;
uint8_t slot; // $surface index
};
std::vector<Resource> resources;
private: private:
int inferSysValDirection(unsigned sn) const; int inferSysValDirection(unsigned sn) const;
@@ -640,8 +648,6 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
tgsi_dump(tokens, 0); tgsi_dump(tokens, 0);
samplerViewTargets = NULL;
mainTempsInLMem = FALSE; mainTempsInLMem = FALSE;
} }
@@ -654,9 +660,6 @@ Source::~Source()
FREE(info->immd.data); FREE(info->immd.data);
if (info->immd.type) if (info->immd.type)
FREE(info->immd.type); FREE(info->immd.type);
if (samplerViewTargets)
delete[] samplerViewTargets;
} }
bool Source::scanSource() bool Source::scanSource()
@@ -673,8 +676,8 @@ bool Source::scanSource()
clipVertexOutput = -1; clipVertexOutput = -1;
samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
samplerViewTargets = new uint8_t[samplerViewCount]; resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
info->immd.bufSize = 0; info->immd.bufSize = 0;
tempArrayCount = 0; tempArrayCount = 0;
@@ -899,9 +902,16 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
info->sv[i].input = inferSysValDirection(sn); info->sv[i].input = inferSysValDirection(sn);
} }
break; break;
case TGSI_FILE_RESOURCE:
for (i = first; i <= last; ++i) {
resources[i].target = decl->Resource.Resource;
resources[i].raw = decl->Resource.Raw;
resources[i].slot = i;
}
break;
case TGSI_FILE_SAMPLER_VIEW: case TGSI_FILE_SAMPLER_VIEW:
for (i = first; i <= last; ++i) for (i = first; i <= last; ++i)
samplerViewTargets[i] = decl->SamplerView.Resource; textureViews[i].target = decl->SamplerView.Resource;
break; break;
case TGSI_FILE_IMMEDIATE_ARRAY: case TGSI_FILE_IMMEDIATE_ARRAY:
{ {
@@ -997,9 +1007,15 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
for (unsigned s = 0; s < insn.srcCount(); ++s) { for (unsigned s = 0; s < insn.srcCount(); ++s) {
Instruction::SrcRegister src = insn.getSrc(s); Instruction::SrcRegister src = insn.getSrc(s);
if (src.getFile() == TGSI_FILE_TEMPORARY) if (src.getFile() == TGSI_FILE_TEMPORARY) {
if (src.isIndirect(0)) if (src.isIndirect(0))
mainTempsInLMem = TRUE; mainTempsInLMem = TRUE;
} else
if (src.getFile() == TGSI_FILE_RESOURCE) {
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
}
if (src.getFile() != TGSI_FILE_INPUT) if (src.getFile() != TGSI_FILE_INPUT)
continue; continue;
unsigned mask = insn.srcMask(s); unsigned mask = insn.srcMask(s);
@@ -1025,13 +1041,16 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
nv50_ir::TexInstruction::Target nv50_ir::TexInstruction::Target
Instruction::getTexture(const tgsi::Source *code, int s) const Instruction::getTexture(const tgsi::Source *code, int s) const
{ {
// XXX: indirect access
unsigned int r;
switch (getSrc(s).getFile()) { switch (getSrc(s).getFile()) {
case TGSI_FILE_SAMPLER_VIEW: { case TGSI_FILE_RESOURCE:
// XXX: indirect access r = getSrc(s).getIndex(0);
unsigned int r = getSrc(s).getIndex(0); return translateTexture(code->resources.at(r).target);
assert(r < code->samplerViewCount); case TGSI_FILE_SAMPLER_VIEW:
return translateTexture(code->samplerViewTargets[r]); r = getSrc(s).getIndex(0);
} return translateTexture(code->textureViews.at(r).target);
default: default:
return translateTexture(insn->Texture.Texture); return translateTexture(insn->Texture.Texture);
} }
@@ -1091,6 +1110,12 @@ private:
void handleLIT(Value *dst0[4]); void handleLIT(Value *dst0[4]);
void handleUserClipPlanes(); void handleUserClipPlanes();
Symbol *getResourceBase(int r);
void getResourceCoords(std::vector<Value *>&, int r, int s);
void handleLOAD(Value *dst0[4]);
void handleSTORE();
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
@@ -1710,6 +1735,236 @@ Converter::handleLIT(Value *dst0[4])
} }
} }
static inline bool
isResourceSpecial(const int r)
{
return (r == TGSI_RESOURCE_GLOBAL ||
r == TGSI_RESOURCE_LOCAL ||
r == TGSI_RESOURCE_PRIVATE ||
r == TGSI_RESOURCE_INPUT);
}
static inline bool
isResourceRaw(const struct tgsi::Source *code, const int r)
{
return isResourceSpecial(r) || code->resources[r].raw;
}
static inline nv50_ir::TexTarget
getResourceTarget(const struct tgsi::Source *code, int r)
{
if (isResourceSpecial(r))
return nv50_ir::TEX_TARGET_BUFFER;
return tgsi::translateTexture(code->resources.at(r).target);
}
Symbol *
Converter::getResourceBase(const int r)
{
Symbol *sym = NULL;
switch (r) {
case TGSI_RESOURCE_GLOBAL:
sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
break;
case TGSI_RESOURCE_LOCAL:
assert(prog->getType() == Program::TYPE_COMPUTE);
sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
info->prop.cp.sharedOffset);
break;
case TGSI_RESOURCE_PRIVATE:
sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
info->bin.tlsSpace);
break;
case TGSI_RESOURCE_INPUT:
assert(prog->getType() == Program::TYPE_COMPUTE);
sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
info->prop.cp.inputOffset);
break;
default:
sym = new_Symbol(prog,
nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
break;
}
return sym;
}
void
Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
{
const int arg =
TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
for (int c = 0; c < arg; ++c)
coords.push_back(fetchSrc(s, c));
// NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
if (r == TGSI_RESOURCE_LOCAL ||
r == TGSI_RESOURCE_PRIVATE ||
r == TGSI_RESOURCE_INPUT)
coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
coords[0]);
}
static inline int
partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
{
int n = 0;
while (mask) {
if (mask & 1) {
size[n]++;
} else {
if (size[n])
comp[n = 1] = size[0] + 1;
else
comp[n]++;
}
mask >>= 1;
}
if (size[0] == 3) {
n = 1;
size[0] = (comp[0] == 1) ? 1 : 2;
size[1] = 3 - size[0];
comp[1] = comp[0] + size[0];
}
return n + 1;
}
// For raw loads, granularity is 4 byte.
// Usage of the texture read mask on OP_SULDP is not allowed.
void
Converter::handleLOAD(Value *dst0[4])
{
const int r = tgsi.getSrc(0).getIndex(0);
int c;
std::vector<Value *> off, src, ldv, def;
getResourceCoords(off, r, 1);
if (isResourceRaw(code, r)) {
uint8_t mask = 0;
uint8_t comp[2] = { 0, 0 };
uint8_t size[2] = { 0, 0 };
Symbol *base = getResourceBase(r);
// determine the base and size of the at most 2 load ops
for (c = 0; c < 4; ++c)
if (!tgsi.getDst(0).isMasked(c))
mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
int n = partitionLoadStore(comp, size, mask);
src = off;
def.resize(4); // index by component, the ones we need will be non-NULL
for (c = 0; c < 4; ++c) {
if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
def[c] = dst0[c];
else
if (mask & (1 << c))
def[c] = getScratch();
}
const bool useLd = isResourceSpecial(r) ||
(info->io.nv50styleSurfaces &&
code->resources[r].target == TGSI_TEXTURE_BUFFER);
for (int i = 0; i < n; ++i) {
ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
if (comp[i]) // adjust x component of source address if necessary
src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
off[0], mkImm(comp[i] * 4));
else
src[0] = off[0];
if (useLd) {
Instruction *ld =
mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
for (size_t c = 1; c < ldv.size(); ++c)
ld->setDef(c, ldv[c]);
} else {
mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
0, ldv, src)->dType = typeOfSize(size[i] * 4);
}
}
} else {
def.resize(4);
for (c = 0; c < 4; ++c) {
if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
def[c] = getScratch();
else
def[c] = dst0[c];
}
mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
def, off);
}
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
if (dst0[c] != def[c])
mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
}
// For formatted stores, the write mask on OP_SUSTP can be used.
// Raw stores have to be split.
void
Converter::handleSTORE()
{
const int r = tgsi.getDst(0).getIndex(0);
int c;
std::vector<Value *> off, src, dummy;
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
if (isResourceRaw(code, r)) {
uint8_t comp[2] = { 0, 0 };
uint8_t size[2] = { 0, 0 };
int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
Symbol *base = getResourceBase(r);
const bool useSt = isResourceSpecial(r) ||
(info->io.nv50styleSurfaces &&
code->resources[r].target == TGSI_TEXTURE_BUFFER);
for (int i = 0; i < n; ++i) {
if (comp[i]) // adjust x component of source address if necessary
src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
off[0], mkImm(comp[i] * 4));
else
src[0] = off[0];
const DataType stTy = typeOfSize(size[i] * 4);
if (useSt) {
Instruction *st =
mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
for (c = 1; c < size[i]; ++c)
st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
st->setIndirect(0, 0, src[0]);
} else {
// attach values to be stored
src.resize(s + size[i]);
for (c = 0; c < size[i]; ++c)
src[s + c] = fetchSrc(1, comp[i] + c);
mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
0, dummy, src)->setType(stTy);
}
}
} else {
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
src.push_back(fetchSrc(1, c));
mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
dummy, src)->tex.mask = tgsi.getDst(0).getMask();
}
}
Converter::Subroutine * Converter::Subroutine *
Converter::getSubroutine(unsigned ip) Converter::getSubroutine(unsigned ip)
{ {
@@ -2072,7 +2327,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40); handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
break; break;
case TGSI_OPCODE_TXF: case TGSI_OPCODE_TXF:
case TGSI_OPCODE_LOAD:
handleTXF(dst0, 1); handleTXF(dst0, 1);
break; break;
case TGSI_OPCODE_TXQ: case TGSI_OPCODE_TXQ:
@@ -2257,6 +2511,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
ERROR("switch/case opcode encountered, should have been lowered\n"); ERROR("switch/case opcode encountered, should have been lowered\n");
abort(); abort();
break; break;
case TGSI_OPCODE_LOAD:
handleLOAD(dst0);
break;
case TGSI_OPCODE_STORE:
handleSTORE();
break;
default: default:
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
assert(0); assert(0);

View File

@@ -594,11 +594,13 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
i->setSrc(arg - 1, src); i->setSrc(arg - 1, src);
if (i->tex.target.isCube()) { if (i->tex.target.isCube()) {
Value *acube[4], *a2d[4]; std::vector<Value *> acube, a2d;
int c; int c;
acube.resize(4);
for (c = 0; c < 4; ++c) for (c = 0; c < 4; ++c)
acube[c] = i->getSrc(c); acube[c] = i->getSrc(c);
a2d.resize(4);
for (c = 0; c < 3; ++c) for (c = 0; c < 3; ++c)
a2d[c] = new_LValue(func, FILE_GPR); a2d[c] = new_LValue(func, FILE_GPR);
a2d[3] = NULL; a2d[3] = NULL;

View File

@@ -532,7 +532,9 @@ void Instruction::print() const
if (perPatch) if (perPatch)
PRINT("patch "); PRINT("patch ");
if (asTex()) if (asTex())
PRINT("%s ", asTex()->tex.target.getName()); PRINT("%s %s$r%u $s%u %s", asTex()->tex.target.getName(),
colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s,
colour[TXT_INSN]);
if (postFactor) if (postFactor)
PRINT("x2^%i ", postFactor); PRINT("x2^%i ", postFactor);
PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]); PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]);