nvc0: add ARB_shader_draw_parameters support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Ilia Mirkin
2015-12-30 14:50:02 -05:00
parent 89bda9772d
commit 517a93b346
14 changed files with 74 additions and 15 deletions

View File

@@ -47,6 +47,7 @@ Note: some of the new features are only available with certain drivers.
<li>GL_ARB_base_instance on freedreno/a4xx</li>
<li>GL_ARB_compute_shader on i965</li>
<li>GL_ARB_copy_image on r600</li>
<li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
<li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li>
<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>

View File

@@ -390,6 +390,9 @@ enum SVSemantic
SV_VERTEX_STRIDE,
SV_INVOCATION_INFO,
SV_THREAD_KILL,
SV_BASEVERTEX,
SV_BASEINSTANCE,
SV_DRAWID,
SV_UNDEFINED,
SV_LAST
};

View File

@@ -124,6 +124,7 @@ struct nv50_ir_prog_info
union {
struct {
uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */
bool usesDrawParameters;
} vp;
struct {
uint8_t inputPatchSize;
@@ -160,8 +161,9 @@ struct nv50_ir_prog_info
uint8_t clipDistances; /* number of clip distance outputs */
uint8_t cullDistances; /* number of cull distance outputs */
int8_t genUserClip; /* request user clip planes for ClipVertex */
uint8_t auxCBSlot; /* constant buffer index of UCP/draw data */
uint16_t ucpBase; /* base address for UCPs */
uint8_t ucpCBSlot; /* constant buffer index of UCP data */
uint16_t drawInfoBase; /* base address for draw parameters */
uint8_t pointSize; /* output index for PointSize */
uint8_t instanceId; /* system value index of InstanceID */
uint8_t vertexId; /* system value index of VertexID */

View File

@@ -377,6 +377,9 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
default:
assert(0);
return nv50_ir::SV_CLOCK;
@@ -1128,6 +1131,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_SEMANTIC_SAMPLEPOS:
info->prop.fp.sampleInterp = 1;
break;
case TGSI_SEMANTIC_BASEVERTEX:
case TGSI_SEMANTIC_BASEINSTANCE:
case TGSI_SEMANTIC_DRAWID:
info->prop.vp.usesDrawParameters = true;
break;
default:
break;
}
@@ -3252,7 +3260,7 @@ Converter::handleUserClipPlanes()
for (c = 0; c < 4; ++c) {
for (i = 0; i < info->io.genUserClip; ++i) {
Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot,
Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
if (c == 0)

View File

@@ -1576,6 +1576,17 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK;
break;
case SV_BASEVERTEX:
case SV_BASEINSTANCE:
case SV_DRAWID:
ld = bld.mkLoad(TYPE_U32, i->getDef(0),
bld.mkSymbol(FILE_MEMORY_CONST,
prog->driver->io.auxCBSlot,
TYPE_U32,
prog->driver->io.drawInfoBase +
4 * (sv - SV_BASEVERTEX)),
NULL);
break;
default:
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));

View File

@@ -295,6 +295,9 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
case SV_SAMPLE_INDEX: return 0;
case SV_SAMPLE_POS: return 0;
case SV_SAMPLE_MASK: return 0;
case SV_BASEVERTEX: return 0;
case SV_BASEINSTANCE: return 0;
case SV_DRAWID: return 0;
default:
return 0xffffffff;
}

View File

@@ -112,7 +112,7 @@ nouveau_codegen(int chipset, int type, struct tgsi_token tokens[],
info.bin.sourceRep = NV50_PROGRAM_IR_TGSI;
info.bin.source = tokens;
info.io.ucpCBSlot = 15;
info.io.auxCBSlot = 15;
info.io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info.io.resInfoCBSlot = 15;

View File

@@ -335,7 +335,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
info->bin.source = (void *)prog->pipe.tokens;
info->io.ucpCBSlot = 15;
info->io.auxCBSlot = 15;
info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
info->io.genUserClip = prog->vp.clpd_nr;

View File

@@ -241,8 +241,10 @@ locn_0f_ts:
parm $r2 /* instance_count */
parm $r4 maddr 0x5f7 /* INDEX_BATCH_FIRST, start */
parm $r4 send $r4 /* index_bias, send start */
maddr 0x8e4 /* CB_DATA */
braz $r2 #dei_end
parm $r5 /* start_instance */
parm $r5 send $r4 /* start_instance, send index_bias */
send $r5 /* send start_instance */
read $r6 0x50d /* VB_ELEMENT_BASE */
read $r7 0x50e /* VB_INSTANCE_BASE */
maddr 0x150d /* VB_ELEMENT,INSTANCE_BASE */
@@ -283,8 +285,10 @@ dei_end:
parm $r2 /* count */
parm $r3 /* instance_count */
parm $r4 maddr 0x35d /* VERTEX_BUFFER_FIRST, start */
parm $r4 send $r4 /* start_instance */
braz $r3 #dai_end
parm $r4 send $r4 /* start_instance */
maddrsend 0x8e4 /* CB_DATA, send 0 as base_vertex */
send $r4 /* send start_instance */
read $r6 0x50e /* VB_INSTANCE_BASE */
maddr 0x50e /* VB_INSTANCE_BASE */
mov $r5 0x1

View File

@@ -128,11 +128,13 @@ uint32_t mme9097_draw_elts_indirect[] = {
0x00000301,
0x00000201,
0x017dc451,
/* 0x000e: dei_again */
0x00002431,
0x0005d007,
0x00000501,
/* 0x001b: dei_end */
/* 0x0010: dei_again */
0x02390021,
0x00061007,
0x00002531,
/* 0x001d: dei_end */
0x00002841,
0x01434615,
0x01438715,
0x05434021,
@@ -161,11 +163,13 @@ uint32_t mme9097_draw_elts_indirect[] = {
uint32_t mme9097_draw_arrays_indirect[] = {
0x00000201,
0x00000301,
/* 0x0009: dai_again */
/* 0x000b: dai_again */
0x00d74451,
0x00049807,
0x00002431,
/* 0x0013: dai_end */
0x0003d807,
/* 0x0015: dai_end */
0x02390071,
0x00002041,
0x01438615,
0x01438021,
0x00004511,

View File

@@ -533,8 +533,9 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->bin.source = (void *)prog->pipe.tokens;
info->io.genUserClip = prog->vp.num_ucps;
info->io.auxCBSlot = 15;
info->io.ucpBase = 256;
info->io.ucpCBSlot = 15;
info->io.drawInfoBase = 256 + 128;
if (prog->type == PIPE_SHADER_COMPUTE) {
if (chipset >= NVISA_GK104_CHIPSET) {
@@ -583,6 +584,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
prog->num_barriers = info->numBarriers;
prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
prog->vp.need_draw_parameters = info->prop.vp.usesDrawParameters;
if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS)
info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */

View File

@@ -42,6 +42,7 @@ struct nvc0_program {
uint8_t num_ucps; /* also set to max if ClipDistance is used */
uint8_t edgeflag; /* attribute index of edgeflag input */
bool need_vertex_id;
bool need_draw_parameters;
} vp;
struct {
uint8_t early_z;

View File

@@ -184,6 +184,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -206,7 +207,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_DRAW_PARAMETERS:
return 0;
case PIPE_CAP_VENDOR_ID:

View File

@@ -814,6 +814,14 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0);
/* Queue things up to let the macros write params to the driver constbuf */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
BEGIN_NVC0(push, NVC0_3D(CB_POS), 1);
PUSH_DATA (push, 256 + 128);
PUSH_SPACE(push, 8);
if (info->indexed) {
assert(nvc0->idxbuf.buffer);
@@ -901,6 +909,18 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
/* 8 as minimum to avoid immediate double validation of new buffers */
nvc0_state_validate(nvc0, ~0, 8);
if (nvc0->vertprog->vp.need_draw_parameters) {
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3);
PUSH_DATA (push, 256 + 128);
PUSH_DATA (push, info->index_bias);
PUSH_DATA (push, info->start_instance);
PUSH_DATA (push, info->drawid);
}
push->kick_notify = nvc0_draw_vbo_kick_notify;
/* TODO: Instead of iterating over all the buffer resources looking for