nvc0: enable FBFETCH with a special slot for color buffer 0
We don't need to support all the color buffers for advanced blend, just cb0. For Fermi, we use the special binding slots so that we don't overlap with user textures, while Kepler+ gets a dedicated position for the fb handle in the driver constbuf. This logic is only triggered when a FBFETCH is actually present so it should be a no-op most of the time. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
@@ -253,7 +253,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
|
||||
GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+
|
||||
|
||||
GL_EXT_color_buffer_float DONE (all drivers)
|
||||
GL_KHR_blend_equation_advanced DONE (i965)
|
||||
GL_KHR_blend_equation_advanced DONE (i965, nvc0)
|
||||
GL_KHR_debug DONE (all drivers)
|
||||
GL_KHR_robustness DONE (i965, nvc0, radeonsi)
|
||||
GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+)
|
||||
|
@@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
|
||||
|
||||
<ul>
|
||||
<li>GL_ARB_post_depth_coverage on i965/gen9+</li>
|
||||
<li>GL_KHR_blend_equation_advanced on nvc0</li>
|
||||
<li>GL_INTEL_conservative_rasterization on i965/gen9+</li>
|
||||
<li>GL_NV_image_formats on any driver supporting GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)</li>
|
||||
<li>GL_ARB_gpu_shader_fp64 in i965/haswell</li>
|
||||
|
@@ -146,6 +146,7 @@ struct nv50_ir_prog_info
|
||||
bool usesDiscard;
|
||||
bool persampleInvocation;
|
||||
bool usesSampleMaskIn;
|
||||
bool readsFramebuffer;
|
||||
} fp;
|
||||
struct {
|
||||
uint32_t inputOffset; /* base address for user args */
|
||||
@@ -178,6 +179,7 @@ struct nv50_ir_prog_info
|
||||
bool fp64; /* program uses fp64 math */
|
||||
bool nv50styleSurfaces; /* generate gX[] access for raw buffers */
|
||||
uint16_t texBindBase; /* base address for tex handles (nve4) */
|
||||
uint16_t fbtexBindBase; /* base address for fbtex handle (nve4) */
|
||||
uint16_t suInfoBase; /* base address for surface info (nve4) */
|
||||
uint16_t bufInfoBase; /* base address for buffer info */
|
||||
uint16_t sampleInfoBase; /* base address for sample positions */
|
||||
|
@@ -1459,6 +1459,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
||||
if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
|
||||
info->numBarriers = 1;
|
||||
|
||||
if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
|
||||
info->prop.fp.readsFramebuffer = true;
|
||||
|
||||
if (insn.dstCount()) {
|
||||
Instruction::DstRegister dst = insn.getDst(0);
|
||||
|
||||
@@ -1574,6 +1577,7 @@ private:
|
||||
void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
|
||||
void handleTXF(Value *dst0[4], int R, int L_M);
|
||||
void handleTXQ(Value *dst0[4], enum TexQuery, int R);
|
||||
void handleFBFETCH(Value *dst0[4]);
|
||||
void handleLIT(Value *dst0[4]);
|
||||
void handleUserClipPlanes();
|
||||
|
||||
@@ -2282,6 +2286,40 @@ Converter::handleTXF(Value *dst[4], int R, int L_M)
|
||||
bb->insertTail(texi);
|
||||
}
|
||||
|
||||
void
|
||||
Converter::handleFBFETCH(Value *dst[4])
|
||||
{
|
||||
TexInstruction *texi = new_TexInstruction(func, OP_TXF);
|
||||
unsigned int c, d;
|
||||
|
||||
texi->tex.target = TEX_TARGET_2D_MS_ARRAY;
|
||||
texi->tex.levelZero = 1;
|
||||
texi->tex.useOffsets = 0;
|
||||
|
||||
for (c = 0, d = 0; c < 4; ++c) {
|
||||
if (dst[c]) {
|
||||
texi->setDef(d++, dst[c]);
|
||||
texi->tex.mask |= 1 << c;
|
||||
}
|
||||
}
|
||||
|
||||
Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));
|
||||
Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));
|
||||
Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));
|
||||
Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));
|
||||
|
||||
mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;
|
||||
mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;
|
||||
texi->setSrc(0, x);
|
||||
texi->setSrc(1, y);
|
||||
texi->setSrc(2, z);
|
||||
texi->setSrc(3, ms);
|
||||
|
||||
texi->tex.r = texi->tex.s = -1;
|
||||
|
||||
bb->insertTail(texi);
|
||||
}
|
||||
|
||||
void
|
||||
Converter::handleLIT(Value *dst0[4])
|
||||
{
|
||||
@@ -3323,6 +3361,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
||||
handleTXQ(dst0, TXQ_TYPE, 0);
|
||||
std::swap(dst0[0], dst0[2]);
|
||||
break;
|
||||
case TGSI_OPCODE_FBFETCH:
|
||||
handleFBFETCH(dst0);
|
||||
break;
|
||||
case TGSI_OPCODE_F2I:
|
||||
case TGSI_OPCODE_F2U:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
|
||||
|
@@ -749,7 +749,10 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
|
||||
i->setIndirectR(hnd);
|
||||
i->setIndirectS(NULL);
|
||||
} else if (i->tex.r == i->tex.s || i->op == OP_TXF) {
|
||||
i->tex.r += prog->driver->io.texBindBase / 4;
|
||||
if (i->tex.r == 0xffff)
|
||||
i->tex.r = prog->driver->io.fbtexBindBase / 4;
|
||||
else
|
||||
i->tex.r += prog->driver->io.texBindBase / 4;
|
||||
i->tex.s = 0; // only a single cX[] value possible here
|
||||
} else {
|
||||
Value *hnd = bld.getScratch();
|
||||
@@ -805,6 +808,11 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
|
||||
Value *ticRel = i->getIndirectR();
|
||||
Value *tscRel = i->getIndirectS();
|
||||
|
||||
if (i->tex.r == 0xffff) {
|
||||
i->tex.r = 0x20;
|
||||
i->tex.s = 0x10;
|
||||
}
|
||||
|
||||
if (ticRel) {
|
||||
i->setSrc(i->tex.rIndirectSrc, NULL);
|
||||
if (i->tex.r)
|
||||
@@ -2507,9 +2515,13 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
|
||||
default:
|
||||
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
|
||||
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
|
||||
ld = bld.mkFetch(i->getDef(0), i->dType,
|
||||
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
|
||||
ld->perPatch = i->perPatch;
|
||||
if (prog->getType() == Program::TYPE_FRAGMENT) {
|
||||
bld.mkInterp(NV50_IR_INTERP_FLAT, i->getDef(0), addr, NULL);
|
||||
} else {
|
||||
ld = bld.mkFetch(i->getDef(0), i->dType,
|
||||
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
|
||||
ld->perPatch = i->perPatch;
|
||||
}
|
||||
break;
|
||||
}
|
||||
bld.getBB()->remove(i);
|
||||
|
@@ -120,6 +120,9 @@
|
||||
/* block/grid size, at 3 32-bits integers each, gridid and work_dim */
|
||||
#define NVC0_CB_AUX_GRID_INFO(i) 0x100 + (i) * 4 /* CP */
|
||||
#define NVC0_CB_AUX_GRID_SIZE (8 * 4)
|
||||
/* FB texture handle */
|
||||
#define NVC0_CB_AUX_FB_TEX_INFO 0x100 /* FP */
|
||||
#define NVC0_CB_AUX_FB_TEX_SIZE (4)
|
||||
/* 8 user clip planes, at 4 32-bits floats each */
|
||||
#define NVC0_CB_AUX_UCP_INFO 0x120
|
||||
#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
|
||||
@@ -206,6 +209,7 @@ struct nvc0_context {
|
||||
unsigned num_samplers[6];
|
||||
uint32_t samplers_dirty[6];
|
||||
bool seamless_cube_map;
|
||||
struct pipe_sampler_view *fbtexture;
|
||||
|
||||
uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
|
||||
|
||||
|
@@ -486,6 +486,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
|
||||
|
||||
fp->fp.early_z = info->prop.fp.earlyFragTests;
|
||||
fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn;
|
||||
fp->fp.reads_framebuffer = info->prop.fp.readsFramebuffer;
|
||||
|
||||
/* Mark position xy and layer as read */
|
||||
if (fp->fp.reads_framebuffer)
|
||||
fp->hdr[5] |= 0x32000000;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -583,6 +588,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
|
||||
info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
|
||||
if (info->target >= NVISA_GK104_CHIPSET) {
|
||||
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
|
||||
info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
|
||||
}
|
||||
|
||||
if (prog->type == PIPE_SHADER_COMPUTE) {
|
||||
|
@@ -49,6 +49,7 @@ struct nvc0_program {
|
||||
bool sample_mask_in;
|
||||
bool force_persample_interp;
|
||||
bool flatshade;
|
||||
bool reads_framebuffer;
|
||||
} fp;
|
||||
struct {
|
||||
uint32_t tess_mode; /* ~0 if defined by the other stage */
|
||||
|
@@ -38,6 +38,8 @@
|
||||
#include "nvc0/mme/com9097.mme.h"
|
||||
#include "nvc0/mme/com90c0.mme.h"
|
||||
|
||||
#include "nv50/g80_texture.xml.h"
|
||||
|
||||
static boolean
|
||||
nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
|
||||
enum pipe_format format,
|
||||
@@ -247,6 +249,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
|
||||
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
|
||||
return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
|
||||
case PIPE_CAP_TGSI_FS_FBFETCH:
|
||||
return class_3d >= NVE4_3D_CLASS; /* needs testing on fermi */
|
||||
|
||||
/* unsupported caps */
|
||||
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
|
||||
@@ -275,7 +279,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
|
||||
case PIPE_CAP_NATIVE_FENCE_FD:
|
||||
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
|
||||
case PIPE_CAP_TGSI_FS_FBFETCH:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_VENDOR_ID:
|
||||
@@ -535,6 +538,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
|
||||
nouveau_heap_destroy(&screen->lib_code);
|
||||
nouveau_heap_destroy(&screen->text_heap);
|
||||
|
||||
FREE(screen->default_tsc);
|
||||
FREE(screen->tic.entries);
|
||||
|
||||
nouveau_object_del(&screen->eng3d);
|
||||
@@ -1226,6 +1230,9 @@ nvc0_screen_create(struct nouveau_device *dev)
|
||||
if (!nvc0_blitter_create(screen))
|
||||
goto fail;
|
||||
|
||||
screen->default_tsc = CALLOC_STRUCT(nv50_tsc_entry);
|
||||
screen->default_tsc->tsc[0] = G80_TSC_0_SRGB_CONVERSION;
|
||||
|
||||
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
|
||||
|
||||
return &screen->base;
|
||||
|
@@ -81,6 +81,8 @@ struct nvc0_screen {
|
||||
|
||||
struct nvc0_blitter *blitter;
|
||||
|
||||
struct nv50_tsc_entry *default_tsc;
|
||||
|
||||
struct {
|
||||
void **entries;
|
||||
int next;
|
||||
|
@@ -604,7 +604,9 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0)
|
||||
// If we're using the incoming sample mask and doing sample shading, we
|
||||
// have to do sample shading "to the max", otherwise there's no way to
|
||||
// tell which sets of samples are covered by the current invocation.
|
||||
if (nvc0->fragprog->fp.sample_mask_in)
|
||||
// Similarly for reading the framebuffer.
|
||||
if (nvc0->fragprog->fp.sample_mask_in ||
|
||||
nvc0->fragprog->fp.reads_framebuffer)
|
||||
samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
|
||||
samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
|
||||
}
|
||||
@@ -700,6 +702,93 @@ nvc0_validate_tess_state(struct nvc0_context *nvc0)
|
||||
PUSH_DATAp(push, nvc0->default_tess_inner, 2);
|
||||
}
|
||||
|
||||
/* If we have a frag shader bound which tries to read from the framebuffer, we
|
||||
* have to make sure that the fb is bound as a texture in the expected
|
||||
* location. For Fermi, that's in the special driver slot 16, while for Kepler
|
||||
* it's a regular binding stored in the driver constbuf.
|
||||
*/
|
||||
static void
|
||||
nvc0_validate_fbread(struct nvc0_context *nvc0)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nvc0_screen *screen = nvc0->screen;
|
||||
struct pipe_context *pipe = &nvc0->base.pipe;
|
||||
struct pipe_sampler_view *old_view = nvc0->fbtexture;
|
||||
struct pipe_sampler_view *new_view = NULL;
|
||||
|
||||
if (nvc0->fragprog &&
|
||||
nvc0->fragprog->fp.reads_framebuffer &&
|
||||
nvc0->framebuffer.nr_cbufs &&
|
||||
nvc0->framebuffer.cbufs[0]) {
|
||||
struct pipe_sampler_view tmpl;
|
||||
struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
|
||||
|
||||
tmpl.target = PIPE_TEXTURE_2D_ARRAY;
|
||||
tmpl.format = sf->format;
|
||||
tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;
|
||||
tmpl.u.tex.first_layer = sf->u.tex.first_layer;
|
||||
tmpl.u.tex.last_layer = sf->u.tex.last_layer;
|
||||
tmpl.swizzle_r = PIPE_SWIZZLE_X;
|
||||
tmpl.swizzle_g = PIPE_SWIZZLE_Y;
|
||||
tmpl.swizzle_b = PIPE_SWIZZLE_Z;
|
||||
tmpl.swizzle_a = PIPE_SWIZZLE_W;
|
||||
|
||||
/* Bail if it's the same parameters */
|
||||
if (old_view && old_view->texture == sf->texture &&
|
||||
old_view->format == sf->format &&
|
||||
old_view->u.tex.first_level == sf->u.tex.level &&
|
||||
old_view->u.tex.first_layer == sf->u.tex.first_layer &&
|
||||
old_view->u.tex.last_layer == sf->u.tex.last_layer)
|
||||
return;
|
||||
|
||||
new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);
|
||||
} else if (old_view == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (old_view)
|
||||
pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
|
||||
nvc0->fbtexture = new_view;
|
||||
|
||||
if (screen->default_tsc->id < 0) {
|
||||
struct nv50_tsc_entry *tsc = nv50_tsc_entry(screen->default_tsc);
|
||||
tsc->id = nvc0_screen_tsc_alloc(screen, tsc);
|
||||
nvc0->base.push_data(&nvc0->base, screen->txc, 65536 + tsc->id * 32,
|
||||
NV_VRAM_DOMAIN(&screen->base), 32, tsc->tsc);
|
||||
screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
|
||||
|
||||
IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
|
||||
if (screen->base.class_3d < NVE4_3D_CLASS) {
|
||||
BEGIN_NVC0(push, NVC0_3D(BIND_TSC2(0)), 1);
|
||||
PUSH_DATA (push, (tsc->id << 12) | 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (new_view) {
|
||||
struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
|
||||
assert(tic->id < 0);
|
||||
tic->id = nvc0_screen_tic_alloc(screen, tic);
|
||||
nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,
|
||||
NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);
|
||||
screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
|
||||
|
||||
if (screen->base.class_3d >= NVE4_3D_CLASS) {
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
|
||||
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
|
||||
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
|
||||
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
|
||||
PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
|
||||
PUSH_DATA (push, (screen->default_tsc->id << 20) | tic->id);
|
||||
} else {
|
||||
BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
|
||||
PUSH_DATA (push, (tic->id << 9) | 1);
|
||||
}
|
||||
|
||||
IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
|
||||
{
|
||||
@@ -781,6 +870,8 @@ validate_list_3d[] = {
|
||||
{ nvc0_validate_textures, NVC0_NEW_3D_TEXTURES },
|
||||
{ nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS },
|
||||
{ nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
|
||||
{ nvc0_validate_fbread, NVC0_NEW_3D_FRAGPROG |
|
||||
NVC0_NEW_3D_FRAMEBUFFER },
|
||||
{ nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
|
||||
{ nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES },
|
||||
{ nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS },
|
||||
|
Reference in New Issue
Block a user