nv50/ir/tgsi: handle TGSI_OPCODE_LOAD,STORE
Squashed and (heavily) modified original patches by Francisco Jerez: nv50/ir/tgsi: Implement resource LOAD/STORE (wip). nv50/ir/tgsi: Emit SUST/SULD for surface access, and add CB LOAD/STORE support nv50/ir/tgsi: Fix/clean up the LOAD/STORE handling code. Left out for now: nv50/ir/tgsi: Resource indirect indexing Treating raw, read-only surfaces as constant buffers (CBs) was removed because CBs are limited to a size of 64 KiB which isn't desireable, and because this decision should probably be made by the state tracker. If we used a number of CB slots for surfaces, it might find that we cannot accomodate the advertised limit.
This commit is contained in:
@@ -871,9 +871,9 @@ public:
|
||||
struct {
|
||||
Target target;
|
||||
|
||||
uint8_t r;
|
||||
uint16_t r;
|
||||
uint16_t s;
|
||||
int8_t rIndirectSrc;
|
||||
uint8_t s;
|
||||
int8_t sIndirectSrc;
|
||||
|
||||
uint8_t mask;
|
||||
|
@@ -240,15 +240,17 @@ BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst,
|
||||
return insn;
|
||||
}
|
||||
|
||||
Instruction *
|
||||
BuildUtil::mkTex(operation op, TexTarget targ, uint8_t tic, uint8_t tsc,
|
||||
Value **def, Value **src)
|
||||
TexInstruction *
|
||||
BuildUtil::mkTex(operation op, TexTarget targ,
|
||||
uint16_t tic, uint16_t tsc,
|
||||
const std::vector<Value *> &def,
|
||||
const std::vector<Value *> &src)
|
||||
{
|
||||
TexInstruction *tex = new_TexInstruction(func, op);
|
||||
|
||||
for (int d = 0; d < 4 && def[d]; ++d)
|
||||
for (size_t d = 0; d < def.size() && def[d]; ++d)
|
||||
tex->setDef(d, def[d]);
|
||||
for (int s = 0; s < 4 && src[s]; ++s)
|
||||
for (size_t s = 0; s < src.size() && src[s]; ++s)
|
||||
tex->setSrc(s, src[s]);
|
||||
|
||||
tex->setTexture(targ, tic, tsc);
|
||||
|
@@ -75,8 +75,10 @@ public:
|
||||
CmpInstruction *mkCmp(operation, CondCode, DataType,
|
||||
Value *,
|
||||
Value *, Value *, Value * = NULL);
|
||||
Instruction *mkTex(operation, TexTarget, uint8_t tic, uint8_t tsc,
|
||||
Value **def, Value **src);
|
||||
TexInstruction *mkTex(operation, TexTarget,
|
||||
uint16_t tic, uint16_t tsc,
|
||||
const std::vector<Value *> &def,
|
||||
const std::vector<Value *> &src);
|
||||
Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *);
|
||||
|
||||
FlowInstruction *mkFlow(operation, void *target, CondCode, Value *pred);
|
||||
|
@@ -161,6 +161,10 @@ struct nv50_ir_prog_info
|
||||
boolean separateFragData;
|
||||
boolean usesDiscard;
|
||||
} fp;
|
||||
struct {
|
||||
uint32_t inputOffset; /* base address for user args */
|
||||
uint32_t sharedOffset; /* reserved space in s[] */
|
||||
} cp;
|
||||
} prop;
|
||||
|
||||
struct {
|
||||
@@ -179,6 +183,7 @@ struct nv50_ir_prog_info
|
||||
uint8_t sampleMask; /* output index of SampleMask */
|
||||
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
|
||||
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
|
||||
boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */
|
||||
uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
|
||||
uint16_t texBindBase; /* base address for tex handles (nve4) */
|
||||
uint16_t suInfoBase; /* base address for surface info (nve4) */
|
||||
|
@@ -559,7 +559,6 @@ static nv50_ir::operation translateOpcode(uint opcode)
|
||||
NV50_IR_OPCODE_CASE(USLT, SET);
|
||||
NV50_IR_OPCODE_CASE(USNE, SET);
|
||||
|
||||
NV50_IR_OPCODE_CASE(LOAD, TXF);
|
||||
NV50_IR_OPCODE_CASE(SAMPLE, TEX);
|
||||
NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
|
||||
NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
|
||||
@@ -620,8 +619,17 @@ public:
|
||||
|
||||
int clipVertexOutput;
|
||||
|
||||
uint8_t *samplerViewTargets; // TGSI_TEXTURE_*
|
||||
unsigned samplerViewCount;
|
||||
struct TextureView {
|
||||
uint8_t target; // TGSI_TEXTURE_*
|
||||
};
|
||||
std::vector<TextureView> textureViews;
|
||||
|
||||
struct Resource {
|
||||
uint8_t target; // TGSI_TEXTURE_*
|
||||
bool raw;
|
||||
uint8_t slot; // $surface index
|
||||
};
|
||||
std::vector<Resource> resources;
|
||||
|
||||
private:
|
||||
int inferSysValDirection(unsigned sn) const;
|
||||
@@ -640,8 +648,6 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
|
||||
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
|
||||
tgsi_dump(tokens, 0);
|
||||
|
||||
samplerViewTargets = NULL;
|
||||
|
||||
mainTempsInLMem = FALSE;
|
||||
}
|
||||
|
||||
@@ -654,9 +660,6 @@ Source::~Source()
|
||||
FREE(info->immd.data);
|
||||
if (info->immd.type)
|
||||
FREE(info->immd.type);
|
||||
|
||||
if (samplerViewTargets)
|
||||
delete[] samplerViewTargets;
|
||||
}
|
||||
|
||||
bool Source::scanSource()
|
||||
@@ -673,8 +676,8 @@ bool Source::scanSource()
|
||||
|
||||
clipVertexOutput = -1;
|
||||
|
||||
samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
|
||||
samplerViewTargets = new uint8_t[samplerViewCount];
|
||||
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
|
||||
resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
|
||||
|
||||
info->immd.bufSize = 0;
|
||||
tempArrayCount = 0;
|
||||
@@ -899,9 +902,16 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
|
||||
info->sv[i].input = inferSysValDirection(sn);
|
||||
}
|
||||
break;
|
||||
case TGSI_FILE_RESOURCE:
|
||||
for (i = first; i <= last; ++i) {
|
||||
resources[i].target = decl->Resource.Resource;
|
||||
resources[i].raw = decl->Resource.Raw;
|
||||
resources[i].slot = i;
|
||||
}
|
||||
break;
|
||||
case TGSI_FILE_SAMPLER_VIEW:
|
||||
for (i = first; i <= last; ++i)
|
||||
samplerViewTargets[i] = decl->SamplerView.Resource;
|
||||
textureViews[i].target = decl->SamplerView.Resource;
|
||||
break;
|
||||
case TGSI_FILE_IMMEDIATE_ARRAY:
|
||||
{
|
||||
@@ -997,9 +1007,15 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
||||
|
||||
for (unsigned s = 0; s < insn.srcCount(); ++s) {
|
||||
Instruction::SrcRegister src = insn.getSrc(s);
|
||||
if (src.getFile() == TGSI_FILE_TEMPORARY)
|
||||
if (src.getFile() == TGSI_FILE_TEMPORARY) {
|
||||
if (src.isIndirect(0))
|
||||
mainTempsInLMem = TRUE;
|
||||
} else
|
||||
if (src.getFile() == TGSI_FILE_RESOURCE) {
|
||||
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
|
||||
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
|
||||
0x1 : 0x2;
|
||||
}
|
||||
if (src.getFile() != TGSI_FILE_INPUT)
|
||||
continue;
|
||||
unsigned mask = insn.srcMask(s);
|
||||
@@ -1025,13 +1041,16 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
||||
nv50_ir::TexInstruction::Target
|
||||
Instruction::getTexture(const tgsi::Source *code, int s) const
|
||||
{
|
||||
// XXX: indirect access
|
||||
unsigned int r;
|
||||
|
||||
switch (getSrc(s).getFile()) {
|
||||
case TGSI_FILE_SAMPLER_VIEW: {
|
||||
// XXX: indirect access
|
||||
unsigned int r = getSrc(s).getIndex(0);
|
||||
assert(r < code->samplerViewCount);
|
||||
return translateTexture(code->samplerViewTargets[r]);
|
||||
}
|
||||
case TGSI_FILE_RESOURCE:
|
||||
r = getSrc(s).getIndex(0);
|
||||
return translateTexture(code->resources.at(r).target);
|
||||
case TGSI_FILE_SAMPLER_VIEW:
|
||||
r = getSrc(s).getIndex(0);
|
||||
return translateTexture(code->textureViews.at(r).target);
|
||||
default:
|
||||
return translateTexture(insn->Texture.Texture);
|
||||
}
|
||||
@@ -1091,6 +1110,12 @@ private:
|
||||
void handleLIT(Value *dst0[4]);
|
||||
void handleUserClipPlanes();
|
||||
|
||||
Symbol *getResourceBase(int r);
|
||||
void getResourceCoords(std::vector<Value *>&, int r, int s);
|
||||
|
||||
void handleLOAD(Value *dst0[4]);
|
||||
void handleSTORE();
|
||||
|
||||
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
|
||||
|
||||
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
|
||||
@@ -1710,6 +1735,236 @@ Converter::handleLIT(Value *dst0[4])
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
isResourceSpecial(const int r)
|
||||
{
|
||||
return (r == TGSI_RESOURCE_GLOBAL ||
|
||||
r == TGSI_RESOURCE_LOCAL ||
|
||||
r == TGSI_RESOURCE_PRIVATE ||
|
||||
r == TGSI_RESOURCE_INPUT);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
isResourceRaw(const struct tgsi::Source *code, const int r)
|
||||
{
|
||||
return isResourceSpecial(r) || code->resources[r].raw;
|
||||
}
|
||||
|
||||
static inline nv50_ir::TexTarget
|
||||
getResourceTarget(const struct tgsi::Source *code, int r)
|
||||
{
|
||||
if (isResourceSpecial(r))
|
||||
return nv50_ir::TEX_TARGET_BUFFER;
|
||||
return tgsi::translateTexture(code->resources.at(r).target);
|
||||
}
|
||||
|
||||
Symbol *
|
||||
Converter::getResourceBase(const int r)
|
||||
{
|
||||
Symbol *sym = NULL;
|
||||
|
||||
switch (r) {
|
||||
case TGSI_RESOURCE_GLOBAL:
|
||||
sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
|
||||
break;
|
||||
case TGSI_RESOURCE_LOCAL:
|
||||
assert(prog->getType() == Program::TYPE_COMPUTE);
|
||||
sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
|
||||
info->prop.cp.sharedOffset);
|
||||
break;
|
||||
case TGSI_RESOURCE_PRIVATE:
|
||||
sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
|
||||
info->bin.tlsSpace);
|
||||
break;
|
||||
case TGSI_RESOURCE_INPUT:
|
||||
assert(prog->getType() == Program::TYPE_COMPUTE);
|
||||
sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
|
||||
info->prop.cp.inputOffset);
|
||||
break;
|
||||
default:
|
||||
sym = new_Symbol(prog,
|
||||
nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
|
||||
break;
|
||||
}
|
||||
return sym;
|
||||
}
|
||||
|
||||
void
|
||||
Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
|
||||
{
|
||||
const int arg =
|
||||
TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
|
||||
|
||||
for (int c = 0; c < arg; ++c)
|
||||
coords.push_back(fetchSrc(s, c));
|
||||
|
||||
// NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
|
||||
if (r == TGSI_RESOURCE_LOCAL ||
|
||||
r == TGSI_RESOURCE_PRIVATE ||
|
||||
r == TGSI_RESOURCE_INPUT)
|
||||
coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
|
||||
coords[0]);
|
||||
}
|
||||
|
||||
static inline int
|
||||
partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
|
||||
{
|
||||
int n = 0;
|
||||
|
||||
while (mask) {
|
||||
if (mask & 1) {
|
||||
size[n]++;
|
||||
} else {
|
||||
if (size[n])
|
||||
comp[n = 1] = size[0] + 1;
|
||||
else
|
||||
comp[n]++;
|
||||
}
|
||||
mask >>= 1;
|
||||
}
|
||||
if (size[0] == 3) {
|
||||
n = 1;
|
||||
size[0] = (comp[0] == 1) ? 1 : 2;
|
||||
size[1] = 3 - size[0];
|
||||
comp[1] = comp[0] + size[0];
|
||||
}
|
||||
return n + 1;
|
||||
}
|
||||
|
||||
// For raw loads, granularity is 4 byte.
|
||||
// Usage of the texture read mask on OP_SULDP is not allowed.
|
||||
void
|
||||
Converter::handleLOAD(Value *dst0[4])
|
||||
{
|
||||
const int r = tgsi.getSrc(0).getIndex(0);
|
||||
int c;
|
||||
std::vector<Value *> off, src, ldv, def;
|
||||
|
||||
getResourceCoords(off, r, 1);
|
||||
|
||||
if (isResourceRaw(code, r)) {
|
||||
uint8_t mask = 0;
|
||||
uint8_t comp[2] = { 0, 0 };
|
||||
uint8_t size[2] = { 0, 0 };
|
||||
|
||||
Symbol *base = getResourceBase(r);
|
||||
|
||||
// determine the base and size of the at most 2 load ops
|
||||
for (c = 0; c < 4; ++c)
|
||||
if (!tgsi.getDst(0).isMasked(c))
|
||||
mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
|
||||
|
||||
int n = partitionLoadStore(comp, size, mask);
|
||||
|
||||
src = off;
|
||||
|
||||
def.resize(4); // index by component, the ones we need will be non-NULL
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
|
||||
def[c] = dst0[c];
|
||||
else
|
||||
if (mask & (1 << c))
|
||||
def[c] = getScratch();
|
||||
}
|
||||
|
||||
const bool useLd = isResourceSpecial(r) ||
|
||||
(info->io.nv50styleSurfaces &&
|
||||
code->resources[r].target == TGSI_TEXTURE_BUFFER);
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
|
||||
|
||||
if (comp[i]) // adjust x component of source address if necessary
|
||||
src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
|
||||
off[0], mkImm(comp[i] * 4));
|
||||
else
|
||||
src[0] = off[0];
|
||||
|
||||
if (useLd) {
|
||||
Instruction *ld =
|
||||
mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
|
||||
for (size_t c = 1; c < ldv.size(); ++c)
|
||||
ld->setDef(c, ldv[c]);
|
||||
} else {
|
||||
mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
|
||||
0, ldv, src)->dType = typeOfSize(size[i] * 4);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
def.resize(4);
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
|
||||
def[c] = getScratch();
|
||||
else
|
||||
def[c] = dst0[c];
|
||||
}
|
||||
|
||||
mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
|
||||
def, off);
|
||||
}
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
|
||||
if (dst0[c] != def[c])
|
||||
mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
|
||||
}
|
||||
|
||||
// For formatted stores, the write mask on OP_SUSTP can be used.
|
||||
// Raw stores have to be split.
|
||||
void
|
||||
Converter::handleSTORE()
|
||||
{
|
||||
const int r = tgsi.getDst(0).getIndex(0);
|
||||
int c;
|
||||
std::vector<Value *> off, src, dummy;
|
||||
|
||||
getResourceCoords(off, r, 0);
|
||||
src = off;
|
||||
const int s = src.size();
|
||||
|
||||
if (isResourceRaw(code, r)) {
|
||||
uint8_t comp[2] = { 0, 0 };
|
||||
uint8_t size[2] = { 0, 0 };
|
||||
|
||||
int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
|
||||
|
||||
Symbol *base = getResourceBase(r);
|
||||
|
||||
const bool useSt = isResourceSpecial(r) ||
|
||||
(info->io.nv50styleSurfaces &&
|
||||
code->resources[r].target == TGSI_TEXTURE_BUFFER);
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (comp[i]) // adjust x component of source address if necessary
|
||||
src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
|
||||
off[0], mkImm(comp[i] * 4));
|
||||
else
|
||||
src[0] = off[0];
|
||||
|
||||
const DataType stTy = typeOfSize(size[i] * 4);
|
||||
|
||||
if (useSt) {
|
||||
Instruction *st =
|
||||
mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
|
||||
for (c = 1; c < size[i]; ++c)
|
||||
st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
|
||||
st->setIndirect(0, 0, src[0]);
|
||||
} else {
|
||||
// attach values to be stored
|
||||
src.resize(s + size[i]);
|
||||
for (c = 0; c < size[i]; ++c)
|
||||
src[s + c] = fetchSrc(1, comp[i] + c);
|
||||
mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
|
||||
0, dummy, src)->setType(stTy);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
|
||||
src.push_back(fetchSrc(1, c));
|
||||
|
||||
mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
|
||||
dummy, src)->tex.mask = tgsi.getDst(0).getMask();
|
||||
}
|
||||
}
|
||||
|
||||
Converter::Subroutine *
|
||||
Converter::getSubroutine(unsigned ip)
|
||||
{
|
||||
@@ -2072,7 +2327,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
||||
handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
|
||||
break;
|
||||
case TGSI_OPCODE_TXF:
|
||||
case TGSI_OPCODE_LOAD:
|
||||
handleTXF(dst0, 1);
|
||||
break;
|
||||
case TGSI_OPCODE_TXQ:
|
||||
@@ -2257,6 +2511,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
||||
ERROR("switch/case opcode encountered, should have been lowered\n");
|
||||
abort();
|
||||
break;
|
||||
case TGSI_OPCODE_LOAD:
|
||||
handleLOAD(dst0);
|
||||
break;
|
||||
case TGSI_OPCODE_STORE:
|
||||
handleSTORE();
|
||||
break;
|
||||
default:
|
||||
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
|
||||
assert(0);
|
||||
|
@@ -594,11 +594,13 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i)
|
||||
i->setSrc(arg - 1, src);
|
||||
|
||||
if (i->tex.target.isCube()) {
|
||||
Value *acube[4], *a2d[4];
|
||||
std::vector<Value *> acube, a2d;
|
||||
int c;
|
||||
|
||||
acube.resize(4);
|
||||
for (c = 0; c < 4; ++c)
|
||||
acube[c] = i->getSrc(c);
|
||||
a2d.resize(4);
|
||||
for (c = 0; c < 3; ++c)
|
||||
a2d[c] = new_LValue(func, FILE_GPR);
|
||||
a2d[3] = NULL;
|
||||
|
@@ -532,7 +532,9 @@ void Instruction::print() const
|
||||
if (perPatch)
|
||||
PRINT("patch ");
|
||||
if (asTex())
|
||||
PRINT("%s ", asTex()->tex.target.getName());
|
||||
PRINT("%s %s$r%u $s%u %s", asTex()->tex.target.getName(),
|
||||
colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s,
|
||||
colour[TXT_INSN]);
|
||||
if (postFactor)
|
||||
PRINT("x2^%i ", postFactor);
|
||||
PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]);
|
||||
|
Reference in New Issue
Block a user