nvc0: enable FBFETCH with a special slot for color buffer 0

We don't need to support all the color buffers for advanced blend, just
cb0. For Fermi, we use the special binding slots so that we don't
overlap with user textures, while Kepler+ gets a dedicated position for
the fb handle in the driver constbuf.

This logic is only triggered when a FBFETCH is actually present so it
should be a no-op most of the time.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
Ilia Mirkin
2017-01-02 00:48:51 -05:00
parent 6b7511c2f1
commit 5ba380c226
11 changed files with 174 additions and 7 deletions

View File

@@ -253,7 +253,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+
GL_EXT_color_buffer_float DONE (all drivers)
GL_KHR_blend_equation_advanced DONE (i965)
GL_KHR_blend_equation_advanced DONE (i965, nvc0)
GL_KHR_debug DONE (all drivers)
GL_KHR_robustness DONE (i965, nvc0, radeonsi)
GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+)

View File

@@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
<ul>
<li>GL_ARB_post_depth_coverage on i965/gen9+</li>
<li>GL_KHR_blend_equation_advanced on nvc0</li>
<li>GL_INTEL_conservative_rasterization on i965/gen9+</li>
<li>GL_NV_image_formats on any driver supporting GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)</li>
<li>GL_ARB_gpu_shader_fp64 in i965/haswell</li>

View File

@@ -146,6 +146,7 @@ struct nv50_ir_prog_info
bool usesDiscard;
bool persampleInvocation;
bool usesSampleMaskIn;
bool readsFramebuffer;
} fp;
struct {
uint32_t inputOffset; /* base address for user args */
@@ -178,6 +179,7 @@ struct nv50_ir_prog_info
bool fp64; /* program uses fp64 math */
bool nv50styleSurfaces; /* generate gX[] access for raw buffers */
uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t fbtexBindBase; /* base address for fbtex handle (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */
uint16_t bufInfoBase; /* base address for buffer info */
uint16_t sampleInfoBase; /* base address for sample positions */

View File

@@ -1459,6 +1459,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
info->numBarriers = 1;
if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
info->prop.fp.readsFramebuffer = true;
if (insn.dstCount()) {
Instruction::DstRegister dst = insn.getDst(0);
@@ -1574,6 +1577,7 @@ private:
void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
void handleTXF(Value *dst0[4], int R, int L_M);
void handleTXQ(Value *dst0[4], enum TexQuery, int R);
void handleFBFETCH(Value *dst0[4]);
void handleLIT(Value *dst0[4]);
void handleUserClipPlanes();
@@ -2282,6 +2286,40 @@ Converter::handleTXF(Value *dst[4], int R, int L_M)
bb->insertTail(texi);
}
void
Converter::handleFBFETCH(Value *dst[4])
{
TexInstruction *texi = new_TexInstruction(func, OP_TXF);
unsigned int c, d;
texi->tex.target = TEX_TARGET_2D_MS_ARRAY;
texi->tex.levelZero = 1;
texi->tex.useOffsets = 0;
for (c = 0, d = 0; c < 4; ++c) {
if (dst[c]) {
texi->setDef(d++, dst[c]);
texi->tex.mask |= 1 << c;
}
}
Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));
Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));
Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));
Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));
mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;
mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;
texi->setSrc(0, x);
texi->setSrc(1, y);
texi->setSrc(2, z);
texi->setSrc(3, ms);
texi->tex.r = texi->tex.s = -1;
bb->insertTail(texi);
}
void
Converter::handleLIT(Value *dst0[4])
{
@@ -3323,6 +3361,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
handleTXQ(dst0, TXQ_TYPE, 0);
std::swap(dst0[0], dst0[2]);
break;
case TGSI_OPCODE_FBFETCH:
handleFBFETCH(dst0);
break;
case TGSI_OPCODE_F2I:
case TGSI_OPCODE_F2U:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)

View File

@@ -749,7 +749,10 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
i->setIndirectR(hnd);
i->setIndirectS(NULL);
} else if (i->tex.r == i->tex.s || i->op == OP_TXF) {
i->tex.r += prog->driver->io.texBindBase / 4;
if (i->tex.r == 0xffff)
i->tex.r = prog->driver->io.fbtexBindBase / 4;
else
i->tex.r += prog->driver->io.texBindBase / 4;
i->tex.s = 0; // only a single cX[] value possible here
} else {
Value *hnd = bld.getScratch();
@@ -805,6 +808,11 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
Value *ticRel = i->getIndirectR();
Value *tscRel = i->getIndirectS();
if (i->tex.r == 0xffff) {
i->tex.r = 0x20;
i->tex.s = 0x10;
}
if (ticRel) {
i->setSrc(i->tex.rIndirectSrc, NULL);
if (i->tex.r)
@@ -2507,9 +2515,13 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
default:
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
ld = bld.mkFetch(i->getDef(0), i->dType,
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
ld->perPatch = i->perPatch;
if (prog->getType() == Program::TYPE_FRAGMENT) {
bld.mkInterp(NV50_IR_INTERP_FLAT, i->getDef(0), addr, NULL);
} else {
ld = bld.mkFetch(i->getDef(0), i->dType,
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
ld->perPatch = i->perPatch;
}
break;
}
bld.getBB()->remove(i);

View File

@@ -120,6 +120,9 @@
/* block/grid size, at 3 32-bits integers each, gridid and work_dim */
#define NVC0_CB_AUX_GRID_INFO(i) 0x100 + (i) * 4 /* CP */
#define NVC0_CB_AUX_GRID_SIZE (8 * 4)
/* FB texture handle */
#define NVC0_CB_AUX_FB_TEX_INFO 0x100 /* FP */
#define NVC0_CB_AUX_FB_TEX_SIZE (4)
/* 8 user clip planes, at 4 32-bits floats each */
#define NVC0_CB_AUX_UCP_INFO 0x120
#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
@@ -206,6 +209,7 @@ struct nvc0_context {
unsigned num_samplers[6];
uint32_t samplers_dirty[6];
bool seamless_cube_map;
struct pipe_sampler_view *fbtexture;
uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */

View File

@@ -486,6 +486,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
fp->fp.early_z = info->prop.fp.earlyFragTests;
fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn;
fp->fp.reads_framebuffer = info->prop.fp.readsFramebuffer;
/* Mark position xy and layer as read */
if (fp->fp.reads_framebuffer)
fp->hdr[5] |= 0x32000000;
return 0;
}
@@ -583,6 +588,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
if (info->target >= NVISA_GK104_CHIPSET) {
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
}
if (prog->type == PIPE_SHADER_COMPUTE) {

View File

@@ -49,6 +49,7 @@ struct nvc0_program {
bool sample_mask_in;
bool force_persample_interp;
bool flatshade;
bool reads_framebuffer;
} fp;
struct {
uint32_t tess_mode; /* ~0 if defined by the other stage */

View File

@@ -38,6 +38,8 @@
#include "nvc0/mme/com9097.mme.h"
#include "nvc0/mme/com90c0.mme.h"
#include "nv50/g80_texture.xml.h"
static boolean
nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
enum pipe_format format,
@@ -247,6 +249,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
case PIPE_CAP_TGSI_FS_FBFETCH:
return class_3d >= NVE4_3D_CLASS; /* needs testing on fermi */
/* unsupported caps */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -275,7 +279,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
case PIPE_CAP_TGSI_FS_FBFETCH:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -535,6 +538,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
nouveau_heap_destroy(&screen->lib_code);
nouveau_heap_destroy(&screen->text_heap);
FREE(screen->default_tsc);
FREE(screen->tic.entries);
nouveau_object_del(&screen->eng3d);
@@ -1226,6 +1230,9 @@ nvc0_screen_create(struct nouveau_device *dev)
if (!nvc0_blitter_create(screen))
goto fail;
screen->default_tsc = CALLOC_STRUCT(nv50_tsc_entry);
screen->default_tsc->tsc[0] = G80_TSC_0_SRGB_CONVERSION;
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return &screen->base;

View File

@@ -81,6 +81,8 @@ struct nvc0_screen {
struct nvc0_blitter *blitter;
struct nv50_tsc_entry *default_tsc;
struct {
void **entries;
int next;

View File

@@ -604,7 +604,9 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0)
// If we're using the incoming sample mask and doing sample shading, we
// have to do sample shading "to the max", otherwise there's no way to
// tell which sets of samples are covered by the current invocation.
if (nvc0->fragprog->fp.sample_mask_in)
// Similarly for reading the framebuffer.
if (nvc0->fragprog->fp.sample_mask_in ||
nvc0->fragprog->fp.reads_framebuffer)
samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
}
@@ -700,6 +702,93 @@ nvc0_validate_tess_state(struct nvc0_context *nvc0)
PUSH_DATAp(push, nvc0->default_tess_inner, 2);
}
/* If we have a frag shader bound which tries to read from the framebuffer, we
* have to make sure that the fb is bound as a texture in the expected
* location. For Fermi, that's in the special driver slot 16, while for Kepler
* it's a regular binding stored in the driver constbuf.
*/
static void
nvc0_validate_fbread(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_screen *screen = nvc0->screen;
struct pipe_context *pipe = &nvc0->base.pipe;
struct pipe_sampler_view *old_view = nvc0->fbtexture;
struct pipe_sampler_view *new_view = NULL;
if (nvc0->fragprog &&
nvc0->fragprog->fp.reads_framebuffer &&
nvc0->framebuffer.nr_cbufs &&
nvc0->framebuffer.cbufs[0]) {
struct pipe_sampler_view tmpl;
struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
tmpl.target = PIPE_TEXTURE_2D_ARRAY;
tmpl.format = sf->format;
tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;
tmpl.u.tex.first_layer = sf->u.tex.first_layer;
tmpl.u.tex.last_layer = sf->u.tex.last_layer;
tmpl.swizzle_r = PIPE_SWIZZLE_X;
tmpl.swizzle_g = PIPE_SWIZZLE_Y;
tmpl.swizzle_b = PIPE_SWIZZLE_Z;
tmpl.swizzle_a = PIPE_SWIZZLE_W;
/* Bail if it's the same parameters */
if (old_view && old_view->texture == sf->texture &&
old_view->format == sf->format &&
old_view->u.tex.first_level == sf->u.tex.level &&
old_view->u.tex.first_layer == sf->u.tex.first_layer &&
old_view->u.tex.last_layer == sf->u.tex.last_layer)
return;
new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);
} else if (old_view == NULL) {
return;
}
if (old_view)
pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
nvc0->fbtexture = new_view;
if (screen->default_tsc->id < 0) {
struct nv50_tsc_entry *tsc = nv50_tsc_entry(screen->default_tsc);
tsc->id = nvc0_screen_tsc_alloc(screen, tsc);
nvc0->base.push_data(&nvc0->base, screen->txc, 65536 + tsc->id * 32,
NV_VRAM_DOMAIN(&screen->base), 32, tsc->tsc);
screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
if (screen->base.class_3d < NVE4_3D_CLASS) {
BEGIN_NVC0(push, NVC0_3D(BIND_TSC2(0)), 1);
PUSH_DATA (push, (tsc->id << 12) | 1);
}
}
if (new_view) {
struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
assert(tic->id < 0);
tic->id = nvc0_screen_tic_alloc(screen, tic);
nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,
NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);
screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
if (screen->base.class_3d >= NVE4_3D_CLASS) {
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
PUSH_DATA (push, (screen->default_tsc->id << 20) | tic->id);
} else {
BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
PUSH_DATA (push, (tic->id << 9) | 1);
}
IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
}
}
static void
nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
{
@@ -781,6 +870,8 @@ validate_list_3d[] = {
{ nvc0_validate_textures, NVC0_NEW_3D_TEXTURES },
{ nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS },
{ nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
{ nvc0_validate_fbread, NVC0_NEW_3D_FRAGPROG |
NVC0_NEW_3D_FRAMEBUFFER },
{ nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
{ nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES },
{ nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS },