Files
third_party_mesa3d/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
Ilia Mirkin 8eb1214755 nvc0: add support for bindless textures on kepler+
This keeps a list of resident textures (per context), and dumps that
list into the active buffer list when submitting. We also treat bindless
texture fetches slightly differently, wrt the meaning of indirect, and
not requiring the SAMPLER file to be used.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
2018-01-07 11:15:23 -05:00

1469 lines
48 KiB
C

/*
* Copyright 2008 Ben Skeggs
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "nvc0/nvc0_context.h"
#include "nvc0/nvc0_resource.h"
#include "nvc0/gm107_texture.xml.h"
#include "nvc0/nvc0_compute.xml.h"
#include "nv50/g80_texture.xml.h"
#include "nv50/g80_defs.xml.h"
#include "util/u_format.h"
#define NVE4_TIC_ENTRY_INVALID 0x000fffff
#define NVE4_TSC_ENTRY_INVALID 0xfff00000
static inline uint32_t
nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
{
switch (swz) {
case PIPE_SWIZZLE_X : return fmt->tic.src_x;
case PIPE_SWIZZLE_Y: return fmt->tic.src_y;
case PIPE_SWIZZLE_Z : return fmt->tic.src_z;
case PIPE_SWIZZLE_W: return fmt->tic.src_w;
case PIPE_SWIZZLE_1:
return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
case PIPE_SWIZZLE_0:
default:
return G80_TIC_SOURCE_ZERO;
}
}
struct pipe_sampler_view *
nvc0_create_sampler_view(struct pipe_context *pipe,
struct pipe_resource *res,
const struct pipe_sampler_view *templ)
{
uint32_t flags = 0;
if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)
flags |= NV50_TEXVIEW_SCALED_COORDS;
return nvc0_create_texture_view(pipe, res, templ, flags, templ->target);
}
static struct pipe_sampler_view *
gm107_create_texture_view(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_sampler_view *templ,
uint32_t flags,
enum pipe_texture_target target)
{
const struct util_format_description *desc;
const struct nvc0_format *fmt;
uint64_t address;
uint32_t *tic;
uint32_t swz[4];
uint32_t width, height;
uint32_t depth;
struct nv50_tic_entry *view;
struct nv50_miptree *mt;
bool tex_int;
view = MALLOC_STRUCT(nv50_tic_entry);
if (!view)
return NULL;
mt = nv50_miptree(texture);
view->pipe = *templ;
view->pipe.reference.count = 1;
view->pipe.texture = NULL;
view->pipe.context = pipe;
view->id = -1;
view->bindless = 0;
pipe_resource_reference(&view->pipe.texture, texture);
tic = &view->tic[0];
desc = util_format_description(view->pipe.format);
tex_int = util_format_is_pure_integer(view->pipe.format);
fmt = &nvc0_format_table[view->pipe.format];
swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
tic[0] = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;
address = mt->base.address;
tic[3] = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
tic[4] = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
else
tic[5] = 0;
/* check for linear storage type */
if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
if (texture->target == PIPE_BUFFER) {
assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
width = view->pipe.u.buf.size / (desc->block.bits / 8) - 1;
address +=
view->pipe.u.buf.offset;
tic[2] = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
tic[3] |= width >> 16;
tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
tic[4] |= width & 0xffff;
} else {
assert(!(mt->level[0].pitch & 0x1f));
/* must be 2D texture without mip maps */
tic[2] = GM107_TIC2_2_HEADER_VERSION_PITCH;
tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
tic[3] |= mt->level[0].pitch >> 5;
tic[4] |= mt->base.base.width0 - 1;
tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
tic[5] |= mt->base.base.height0 - 1;
}
tic[1] = address;
tic[2] |= address >> 32;
tic[6] = 0;
tic[7] = 0;
return &view->pipe;
}
tic[2] = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
tic[3] |=
((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
((mt->level[0].tile_mode & 0xf00) >> 8 << 6);
depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
if (mt->base.base.array_size > 1) {
/* there doesn't seem to be a base layer field in TIC */
address += view->pipe.u.tex.first_layer * mt->layer_stride;
depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
}
tic[1] = address;
tic[2] |= address >> 32;
switch (target) {
case PIPE_TEXTURE_1D:
tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
break;
case PIPE_TEXTURE_2D:
tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
break;
case PIPE_TEXTURE_RECT:
tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
break;
case PIPE_TEXTURE_3D:
tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
break;
case PIPE_TEXTURE_CUBE:
depth /= 6;
tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
break;
case PIPE_TEXTURE_1D_ARRAY:
tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
break;
case PIPE_TEXTURE_2D_ARRAY:
tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
break;
case PIPE_TEXTURE_CUBE_ARRAY:
depth /= 6;
tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
break;
default:
unreachable("unexpected/invalid texture target");
}
tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
width = mt->base.base.width0 << mt->ms_x;
height = mt->base.base.height0 << mt->ms_y;
} else {
width = mt->base.base.width0;
height = mt->base.base.height0;
}
tic[4] |= width - 1;
tic[5] |= (height - 1) & 0xffff;
tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
/* sampling points: (?) */
if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
} else {
tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
}
tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;
return &view->pipe;
}
struct pipe_sampler_view *
gm107_create_texture_view_from_image(struct pipe_context *pipe,
const struct pipe_image_view *view)
{
struct nv04_resource *res = nv04_resource(view->resource);
struct pipe_sampler_view templ = {};
enum pipe_texture_target target;
uint32_t flags = 0;
if (!res)
return NULL;
target = res->base.target;
if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
target = PIPE_TEXTURE_2D_ARRAY;
templ.format = view->format;
templ.swizzle_r = PIPE_SWIZZLE_X;
templ.swizzle_g = PIPE_SWIZZLE_Y;
templ.swizzle_b = PIPE_SWIZZLE_Z;
templ.swizzle_a = PIPE_SWIZZLE_W;
if (target == PIPE_BUFFER) {
templ.u.buf.offset = view->u.buf.offset;
templ.u.buf.size = view->u.buf.size;
} else {
templ.u.tex.first_layer = view->u.tex.first_layer;
templ.u.tex.last_layer = view->u.tex.last_layer;
templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
}
flags = NV50_TEXVIEW_SCALED_COORDS;
return nvc0_create_texture_view(pipe, &res->base, &templ, flags, target);
}
static struct pipe_sampler_view *
gf100_create_texture_view(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_sampler_view *templ,
uint32_t flags,
enum pipe_texture_target target)
{
const struct util_format_description *desc;
const struct nvc0_format *fmt;
uint64_t address;
uint32_t *tic;
uint32_t swz[4];
uint32_t width, height;
uint32_t depth;
uint32_t tex_fmt;
struct nv50_tic_entry *view;
struct nv50_miptree *mt;
bool tex_int;
view = MALLOC_STRUCT(nv50_tic_entry);
if (!view)
return NULL;
mt = nv50_miptree(texture);
view->pipe = *templ;
view->pipe.reference.count = 1;
view->pipe.texture = NULL;
view->pipe.context = pipe;
view->id = -1;
view->bindless = 0;
pipe_resource_reference(&view->pipe.texture, texture);
tic = &view->tic[0];
desc = util_format_description(view->pipe.format);
fmt = &nvc0_format_table[view->pipe.format];
tex_int = util_format_is_pure_integer(view->pipe.format);
tex_fmt = fmt->tic.format & 0x3f;
swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
(fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
(fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
(fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
(fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
(swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
(swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
(swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
(swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));
address = mt->base.address;
tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
tic[2] |= G80_TIC_2_SRGB_CONVERSION;
if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
/* check for linear storage type */
if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
if (texture->target == PIPE_BUFFER) {
assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
address +=
view->pipe.u.buf.offset;
tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
tic[3] = 0;
tic[4] = /* width */
view->pipe.u.buf.size / (desc->block.bits / 8);
tic[5] = 0;
} else {
/* must be 2D texture without mip maps */
tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
tic[3] = mt->level[0].pitch;
tic[4] = mt->base.base.width0;
tic[5] = (1 << 16) | mt->base.base.height0;
}
tic[6] =
tic[7] = 0;
tic[1] = address;
tic[2] |= address >> 32;
return &view->pipe;
}
tic[2] |=
((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
((mt->level[0].tile_mode & 0xf00) << (25 - 8));
depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
if (mt->base.base.array_size > 1) {
/* there doesn't seem to be a base layer field in TIC */
address += view->pipe.u.tex.first_layer * mt->layer_stride;
depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
}
tic[1] = address;
tic[2] |= address >> 32;
switch (target) {
case PIPE_TEXTURE_1D:
tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
break;
case PIPE_TEXTURE_2D:
tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
break;
case PIPE_TEXTURE_RECT:
tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
break;
case PIPE_TEXTURE_3D:
tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
break;
case PIPE_TEXTURE_CUBE:
depth /= 6;
tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
break;
case PIPE_TEXTURE_1D_ARRAY:
tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
break;
case PIPE_TEXTURE_2D_ARRAY:
tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
break;
case PIPE_TEXTURE_CUBE_ARRAY:
depth /= 6;
tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
break;
default:
unreachable("unexpected/invalid texture target");
}
tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
width = mt->base.base.width0 << mt->ms_x;
height = mt->base.base.height0 << mt->ms_y;
} else {
width = mt->base.base.width0;
height = mt->base.base.height0;
}
tic[4] = (1 << 31) | width;
tic[5] = height & 0xffff;
tic[5] |= depth << 16;
tic[5] |= mt->base.base.last_level << 28;
/* sampling points: (?) */
if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
else
tic[6] = 0x03000000;
tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
tic[7] |= mt->ms_mode << 12;
return &view->pipe;
}
struct pipe_sampler_view *
nvc0_create_texture_view(struct pipe_context *pipe,
struct pipe_resource *texture,
const struct pipe_sampler_view *templ,
uint32_t flags,
enum pipe_texture_target target)
{
if (nvc0_context(pipe)->screen->tic.maxwell)
return gm107_create_texture_view(pipe, texture, templ, flags, target);
return gf100_create_texture_view(pipe, texture, templ, flags, target);
}
bool
nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
struct nv04_resource *res)
{
uint64_t address = res->address;
if (res->base.target != PIPE_BUFFER)
return false;
address += tic->pipe.u.buf.offset;
if (tic->tic[1] == (uint32_t)address &&
(tic->tic[2] & 0xff) == address >> 32)
return false;
tic->tic[1] = address;
tic->tic[2] &= 0xffffff00;
tic->tic[2] |= address >> 32;
if (tic->id >= 0) {
nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
tic->tic);
return true;
}
return false;
}
bool
nvc0_validate_tic(struct nvc0_context *nvc0, int s)
{
uint32_t commands[32];
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
unsigned i;
unsigned n = 0;
bool need_flush = false;
for (i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
struct nv04_resource *res;
const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
if (!tic) {
if (dirty)
commands[n++] = (i << 1) | 0;
continue;
}
res = nv04_resource(tic->pipe.texture);
need_flush |= nvc0_update_tic(nvc0, tic, res);
if (tic->id < 0) {
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
tic->tic);
need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
if (unlikely(s == 5))
BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
else
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
PUSH_DATA (push, (tic->id << 4) | 1);
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
}
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
if (!dirty)
continue;
commands[n++] = (tic->id << 9) | (i << 1) | 1;
if (unlikely(s == 5))
BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
else
BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
}
for (; i < nvc0->state.num_textures[s]; ++i)
commands[n++] = (i << 1) | 0;
nvc0->state.num_textures[s] = nvc0->num_textures[s];
if (n) {
if (unlikely(s == 5))
BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
else
BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
PUSH_DATAp(push, commands, n);
}
nvc0->textures_dirty[s] = 0;
return need_flush;
}
static bool
nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
unsigned i;
bool need_flush = false;
for (i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
struct nv04_resource *res;
const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
if (!tic) {
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
continue;
}
res = nv04_resource(tic->pipe.texture);
need_flush |= nvc0_update_tic(nvc0, tic, res);
if (tic->id < 0) {
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
tic->tic);
need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
PUSH_DATA (push, (tic->id << 4) | 1);
}
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
nvc0->tex_handles[s][i] |= tic->id;
if (dirty)
BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
}
for (; i < nvc0->state.num_textures[s]; ++i) {
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
nvc0->textures_dirty[s] |= 1 << i;
}
nvc0->state.num_textures[s] = nvc0->num_textures[s];
return need_flush;
}
void nvc0_validate_textures(struct nvc0_context *nvc0)
{
bool need_flush = false;
int i;
for (i = 0; i < 5; i++) {
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
need_flush |= nve4_validate_tic(nvc0, i);
else
need_flush |= nvc0_validate_tic(nvc0, i);
}
if (need_flush) {
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
/* Invalidate all CP textures because they are aliased. */
for (int i = 0; i < nvc0->num_textures[5]; i++)
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i));
nvc0->textures_dirty[5] = ~0;
nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
}
bool
nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
{
uint32_t commands[16];
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
unsigned i;
unsigned n = 0;
bool need_flush = false;
for (i = 0; i < nvc0->num_samplers[s]; ++i) {
struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
if (!(nvc0->samplers_dirty[s] & (1 << i)))
continue;
if (!tsc) {
commands[n++] = (i << 4) | 0;
continue;
}
nvc0->seamless_cube_map = tsc->seamless_cube_map;
if (tsc->id < 0) {
tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
32, tsc->tsc);
need_flush = true;
}
nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
commands[n++] = (tsc->id << 12) | (i << 4) | 1;
}
for (; i < nvc0->state.num_samplers[s]; ++i)
commands[n++] = (i << 4) | 0;
nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
if (n) {
if (unlikely(s == 5))
BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
else
BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
PUSH_DATAp(push, commands, n);
}
nvc0->samplers_dirty[s] = 0;
return need_flush;
}
bool
nve4_validate_tsc(struct nvc0_context *nvc0, int s)
{
unsigned i;
bool need_flush = false;
for (i = 0; i < nvc0->num_samplers[s]; ++i) {
struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
if (!tsc) {
nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
continue;
}
if (tsc->id < 0) {
tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
65536 + tsc->id * 32,
NV_VRAM_DOMAIN(&nvc0->screen->base),
32, tsc->tsc);
need_flush = true;
}
nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
nvc0->tex_handles[s][i] |= tsc->id << 20;
}
for (; i < nvc0->state.num_samplers[s]; ++i) {
nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
nvc0->samplers_dirty[s] |= 1 << i;
}
nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
return need_flush;
}
void nvc0_validate_samplers(struct nvc0_context *nvc0)
{
bool need_flush = false;
int i;
for (i = 0; i < 5; i++) {
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
need_flush |= nve4_validate_tsc(nvc0, i);
else
need_flush |= nvc0_validate_tsc(nvc0, i);
}
if (need_flush) {
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
}
/* Invalidate all CP samplers because they are aliased. */
nvc0->samplers_dirty[5] = ~0;
nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
}
/* Upload the "diagonal" entries for the possible texture sources ($t == $s).
* At some point we might want to get a list of the combinations used by a
* shader and fill in those entries instead of having it extract the handles.
*/
void
nve4_set_tex_handles(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_screen *screen = nvc0->screen;
unsigned s;
if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
return;
for (s = 0; s < 5; ++s) {
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
if (!dirty)
continue;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
do {
int i = ffs(dirty) - 1;
dirty &= ~(1 << i);
BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
PUSH_DATA (push, (8 + i) * 4);
PUSH_DATA (push, nvc0->tex_handles[s][i]);
} while (dirty);
nvc0->textures_dirty[s] = 0;
nvc0->samplers_dirty[s] = 0;
}
}
static uint64_t
nve4_create_texture_handle(struct pipe_context *pipe,
struct pipe_sampler_view *view,
const struct pipe_sampler_state *sampler)
{
/* We have to create persistent handles that won't change for these objects
* That means that we have to upload them into place and lock them so that
* they can't be kicked out later.
*/
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nv50_tic_entry *tic = nv50_tic_entry(view);
struct nv50_tsc_entry *tsc = pipe->create_sampler_state(pipe, sampler);
struct pipe_sampler_view *v = NULL;
tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
if (tsc->id < 0)
goto fail;
if (tic->id < 0) {
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
if (tic->id < 0)
goto fail;
nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
tic->tic);
IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
}
nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
65536 + tsc->id * 32,
NV_VRAM_DOMAIN(&nvc0->screen->base),
32, tsc->tsc);
IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
// Add an extra reference to this sampler view effectively held by this
// texture handle. This is to deal with the sampler view being dereferenced
// before the handle is. However we need the view to still be live until the
// handle to it is deleted.
pipe_sampler_view_reference(&v, view);
p_atomic_inc(&tic->bindless);
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
return 0x100000000ULL | (tsc->id << 20) | tic->id;
fail:
pipe->delete_sampler_state(pipe, tsc);
return 0;
}
static bool
view_bound(struct nvc0_context *nvc0, struct pipe_sampler_view *view) {
for (int s = 0; s < 6; s++) {
for (int i = 0; i < nvc0->num_textures[s]; i++)
if (nvc0->textures[s][i] == view)
return true;
}
return false;
}
static void
nve4_delete_texture_handle(struct pipe_context *pipe, uint64_t handle)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
uint32_t tic = handle & NVE4_TIC_ENTRY_INVALID;
uint32_t tsc = (handle & NVE4_TSC_ENTRY_INVALID) >> 20;
struct nv50_tic_entry *entry = nvc0->screen->tic.entries[tic];
if (entry) {
struct pipe_sampler_view *view = &entry->pipe;
assert(entry->bindless);
p_atomic_dec(&entry->bindless);
if (!view_bound(nvc0, view))
nvc0_screen_tic_unlock(nvc0->screen, entry);
pipe_sampler_view_reference(&view, NULL);
}
pipe->delete_sampler_state(pipe, nvc0->screen->tsc.entries[tsc]);
}
static void
nve4_make_texture_handle_resident(struct pipe_context *pipe,
uint64_t handle, bool resident)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
if (resident) {
struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
struct nv50_tic_entry *tic =
nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];
assert(tic);
assert(tic->bindless);
res->handle = handle;
res->buf = nv04_resource(tic->pipe.texture);
res->flags = NOUVEAU_BO_RD;
list_add(&res->list, &nvc0->tex_head);
} else {
list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->tex_head, list) {
if (pos->handle == handle) {
list_del(&pos->list);
free(pos);
break;
}
}
}
}
void
nvc0_init_bindless_functions(struct pipe_context *pipe) {
pipe->create_texture_handle = nve4_create_texture_handle;
pipe->delete_texture_handle = nve4_delete_texture_handle;
pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
}
static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
static void
nvc0_get_surface_dims(struct pipe_image_view *view, int *width, int *height,
int *depth)
{
struct nv04_resource *res = nv04_resource(view->resource);
int level;
*width = *height = *depth = 1;
if (res->base.target == PIPE_BUFFER) {
*width = view->u.buf.size / util_format_get_blocksize(view->format);
return;
}
level = view->u.tex.level;
*width = u_minify(view->resource->width0, level);
*height = u_minify(view->resource->height0, level);
*depth = u_minify(view->resource->depth0, level);
switch (res->base.target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
*depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
break;
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_3D:
break;
default:
assert(!"unexpected texture target");
break;
}
}
void
nvc0_mark_image_range_valid(const struct pipe_image_view *view)
{
struct nv04_resource *res = (struct nv04_resource *)view->resource;
assert(view->resource->target == PIPE_BUFFER);
util_range_add(&res->valid_buffer_range,
view->u.buf.offset,
view->u.buf.offset + view->u.buf.size);
}
void
nve4_set_surface_info(struct nouveau_pushbuf *push,
struct pipe_image_view *view,
struct nvc0_context *nvc0)
{
struct nvc0_screen *screen = nvc0->screen;
struct nv04_resource *res;
uint64_t address;
uint32_t *const info = push->cur;
int width, height, depth;
uint8_t log2cpp;
if (view && !nve4_su_format_map[view->format])
NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
push->cur += 16;
if (!view || !nve4_su_format_map[view->format]) {
memset(info, 0, 16 * sizeof(*info));
info[0] = 0xbadf0000;
info[1] = 0x80004000;
info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
screen->lib_code->start;
return;
}
res = nv04_resource(view->resource);
address = res->address;
/* get surface dimensions based on the target. */
nvc0_get_surface_dims(view, &width, &height, &depth);
info[8] = width;
info[9] = height;
info[10] = depth;
switch (res->base.target) {
case PIPE_TEXTURE_1D_ARRAY:
info[11] = 1;
break;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
info[11] = 2;
break;
case PIPE_TEXTURE_3D:
info[11] = 3;
break;
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
info[11] = 4;
break;
default:
info[11] = 0;
break;
}
log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;
/* Stick the blockwidth (ie. number of bytes per pixel) to check if the
* format doesn't mismatch. */
info[12] = util_format_get_blocksize(view->format);
/* limit in bytes for raw access */
info[13] = (0x06 << 22) | ((width << log2cpp) - 1);
info[1] = nve4_su_format_map[view->format];
#if 0
switch (util_format_get_blocksizebits(view->format)) {
case 16: info[1] |= 1 << 16; break;
case 32: info[1] |= 2 << 16; break;
case 64: info[1] |= 3 << 16; break;
case 128: info[1] |= 4 << 16; break;
default:
break;
}
#else
info[1] |= log2cpp << 16;
info[1] |= 0x4000;
info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);
#endif
if (res->base.target == PIPE_BUFFER) {
address += view->u.buf.offset;
info[0] = address >> 8;
info[2] = width - 1;
info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
info[3] = 0;
info[4] = 0;
info[5] = 0;
info[6] = 0;
info[7] = 0;
info[14] = 0;
info[15] = 0;
} else {
struct nv50_miptree *mt = nv50_miptree(&res->base);
struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
const unsigned z = view->u.tex.first_layer;
if (z) {
if (mt->layout_3d) {
address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
/* doesn't work if z passes z-tile boundary */
if (depth > 1) {
pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
"3D images are not really supported!");
debug_printf("3D images are not really supported!\n");
}
} else {
address += mt->layer_stride * z;
}
}
address += lvl->offset;
info[0] = address >> 8;
info[2] = (width << mt->ms_x) - 1;
/* NOTE: this is really important: */
info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
info[3] = (0x88 << 24) | (lvl->pitch / 64);
info[4] = (height << mt->ms_y) - 1;
info[4] |= (lvl->tile_mode & 0x0f0) << 25;
info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
info[5] = mt->layer_stride >> 8;
info[6] = depth - 1;
info[6] |= (lvl->tile_mode & 0xf00) << 21;
info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
info[7] = 0;
info[14] = mt->ms_x;
info[15] = mt->ms_y;
}
}
static inline void
nvc0_set_surface_info(struct nouveau_pushbuf *push,
struct pipe_image_view *view, uint64_t address,
int width, int height, int depth)
{
struct nv04_resource *res;
uint32_t *const info = push->cur;
push->cur += 16;
/* Make sure to always initialize the surface information area because it's
* used to check if the given image is bound or not. */
memset(info, 0, 16 * sizeof(*info));
if (!view || !view->resource)
return;
res = nv04_resource(view->resource);
/* Stick the image dimensions for the imageSize() builtin. */
info[8] = width;
info[9] = height;
info[10] = depth;
/* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
* offset and to check if the format doesn't mismatch. */
info[12] = util_format_get_blocksize(view->format);
if (res->base.target == PIPE_BUFFER) {
info[0] = address >> 8;
info[2] = width;
} else {
struct nv50_miptree *mt = nv50_miptree(&res->base);
info[0] = address >> 8;
info[2] = width;
info[4] = height;
info[5] = mt->layer_stride >> 8;
info[6] = depth;
info[14] = mt->ms_x;
info[15] = mt->ms_y;
}
}
void
nvc0_validate_suf(struct nvc0_context *nvc0, int s)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_screen *screen = nvc0->screen;
for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
struct pipe_image_view *view = &nvc0->images[s][i];
int width, height, depth;
uint64_t address = 0;
if (s == 5)
BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
else
BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
if (view->resource) {
struct nv04_resource *res = nv04_resource(view->resource);
unsigned rt = nvc0_format_table[view->format].rt;
if (util_format_is_depth_or_stencil(view->format))
rt = rt << 12;
else
rt = (rt << 4) | (0x14 << 12);
/* get surface dimensions based on the target. */
nvc0_get_surface_dims(view, &width, &height, &depth);
address = res->address;
if (res->base.target == PIPE_BUFFER) {
unsigned blocksize = util_format_get_blocksize(view->format);
address += view->u.buf.offset;
assert(!(address & 0xff));
if (view->access & PIPE_IMAGE_ACCESS_WRITE)
nvc0_mark_image_range_valid(view);
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
PUSH_DATA (push, align(width * blocksize, 0x100));
PUSH_DATA (push, NVC0_3D_IMAGE_HEIGHT_LINEAR | 1);
PUSH_DATA (push, rt);
PUSH_DATA (push, 0);
} else {
struct nv50_miptree *mt = nv50_miptree(view->resource);
struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
const unsigned z = view->u.tex.first_layer;
if (mt->layout_3d) {
address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
if (depth >= 1) {
pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
"3D images are not supported!");
debug_printf("3D images are not supported!\n");
}
} else {
address += mt->layer_stride * z;
}
address += lvl->offset;
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
PUSH_DATA (push, width << mt->ms_x);
PUSH_DATA (push, height << mt->ms_y);
PUSH_DATA (push, rt);
PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
}
if (s == 5)
BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
else
BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
} else {
PUSH_DATA(push, 0);
PUSH_DATA(push, 0);
PUSH_DATA(push, 0);
PUSH_DATA(push, 0);
PUSH_DATA(push, 0x14000);
PUSH_DATA(push, 0);
}
/* stick surface information into the driver constant buffer */
if (s == 5)
BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
else
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
if (s == 5)
BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 16);
else
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
nvc0_set_surface_info(push, view, address, width, height, depth);
}
}
static inline void
nvc0_update_surface_bindings(struct nvc0_context *nvc0)
{
nvc0_validate_suf(nvc0, 4);
/* Invalidate all COMPUTE images because they are aliased with FRAGMENT. */
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
nvc0->images_dirty[5] |= nvc0->images_valid[5];
}
static void
gm107_validate_surfaces(struct nvc0_context *nvc0,
struct pipe_image_view *view, int stage, int slot)
{
struct nv04_resource *res = nv04_resource(view->resource);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_screen *screen = nvc0->screen;
struct nv50_tic_entry *tic;
tic = nv50_tic_entry(nvc0->images_tic[stage][slot]);
res = nv04_resource(tic->pipe.texture);
nvc0_update_tic(nvc0, tic, res);
if (tic->id < 0) {
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
/* upload the texture view */
nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
NV_VRAM_DOMAIN(&nvc0->screen->base), 32, tic->tic);
BEGIN_NVC0(push, NVC0_3D(TIC_FLUSH), 1);
PUSH_DATA (push, 0);
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
PUSH_DATA (push, (tic->id << 4) | 1);
}
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RD);
/* upload the texture handle */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
PUSH_DATA (push, tic->id);
}
static inline void
nve4_update_surface_bindings(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_screen *screen = nvc0->screen;
int i, j, s;
for (s = 0; s < 5; s++) {
if (!nvc0->images_dirty[s])
continue;
for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
struct pipe_image_view *view = &nvc0->images[s][i];
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, NVC0_CB_AUX_SIZE);
PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
if (view->resource) {
struct nv04_resource *res = nv04_resource(view->resource);
if (res->base.target == PIPE_BUFFER) {
if (view->access & PIPE_IMAGE_ACCESS_WRITE)
nvc0_mark_image_range_valid(view);
}
nve4_set_surface_info(push, view, nvc0);
BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
gm107_validate_surfaces(nvc0, view, s, i);
} else {
for (j = 0; j < 16; j++)
PUSH_DATA(push, 0);
}
}
}
}
void
nvc0_validate_surfaces(struct nvc0_context *nvc0)
{
if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
nve4_update_surface_bindings(nvc0);
} else {
nvc0_update_surface_bindings(nvc0);
}
}
static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
{
[PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
[PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
[PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
[PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
[PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
[PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
[PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
[PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
[PIPE_FORMAT_B8G8R8A8_UNORM] = GK104_IMAGE_FORMAT_BGRA8_UNORM,
[PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
[PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
[PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
[PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
[PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
[PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
[PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT,
[PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
[PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
[PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
[PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
[PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
[PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
[PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
[PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
[PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
[PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
[PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
[PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
[PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
[PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
[PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
[PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
[PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
[PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
[PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
[PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
[PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
[PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
[PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
[PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
};
/* Auxiliary format description values for surface instructions.
* (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
*/
static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
{
[PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
[PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
[PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
[PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
[PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
[PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
[PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
[PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
[PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
[PIPE_FORMAT_R32G32_SINT] = 0x3433,
[PIPE_FORMAT_R32G32_UINT] = 0x3433,
[PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
[PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24,
[PIPE_FORMAT_B8G8R8A8_UNORM] = 0x2a24,
[PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
[PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
[PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
[PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
[PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
[PIPE_FORMAT_R16G16_UNORM] = 0x2524,
[PIPE_FORMAT_R16G16_SNORM] = 0x2524,
[PIPE_FORMAT_R16G16_SINT] = 0x2524,
[PIPE_FORMAT_R16G16_UINT] = 0x2524,
[PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
[PIPE_FORMAT_R32_SINT] = 0x2024,
[PIPE_FORMAT_R32_UINT] = 0x2024,
[PIPE_FORMAT_R32_FLOAT] = 0x2024,
[PIPE_FORMAT_R8G8_UNORM] = 0x1615,
[PIPE_FORMAT_R8G8_SNORM] = 0x1615,
[PIPE_FORMAT_R8G8_SINT] = 0x1615,
[PIPE_FORMAT_R8G8_UINT] = 0x1615,
[PIPE_FORMAT_R16_UNORM] = 0x1115,
[PIPE_FORMAT_R16_SNORM] = 0x1115,
[PIPE_FORMAT_R16_SINT] = 0x1115,
[PIPE_FORMAT_R16_UINT] = 0x1115,
[PIPE_FORMAT_R16_FLOAT] = 0x1115,
[PIPE_FORMAT_R8_UNORM] = 0x0206,
[PIPE_FORMAT_R8_SNORM] = 0x0206,
[PIPE_FORMAT_R8_SINT] = 0x0206,
[PIPE_FORMAT_R8_UINT] = 0x0206
};
/* NOTE: These are hardcoded offsets for the shader library.
* TODO: Automate them.
*/
static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
{
[PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
[PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,
[PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,
[PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
[PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
[PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,
[PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,
[PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
[PIPE_FORMAT_R32G32_FLOAT] = 0x428,
[PIPE_FORMAT_R32G32_SINT] = 0x468,
[PIPE_FORMAT_R32G32_UINT] = 0x468,
[PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,
[PIPE_FORMAT_R10G10B10A2_UINT] = 0x530,
[PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,
[PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,
[PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,
[PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,
[PIPE_FORMAT_B5G6R5_UNORM] = 0x718,
[PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,
[PIPE_FORMAT_R16G16_UNORM] = 0x828,
[PIPE_FORMAT_R16G16_SNORM] = 0x890,
[PIPE_FORMAT_R16G16_SINT] = 0x8f0,
[PIPE_FORMAT_R16G16_UINT] = 0x948,
[PIPE_FORMAT_R16G16_FLOAT] = 0x998,
[PIPE_FORMAT_R32_FLOAT] = 0x9e8,
[PIPE_FORMAT_R32_SINT] = 0xa30,
[PIPE_FORMAT_R32_UINT] = 0xa30,
[PIPE_FORMAT_R8G8_UNORM] = 0xa78,
[PIPE_FORMAT_R8G8_SNORM] = 0xae0,
[PIPE_FORMAT_R8G8_UINT] = 0xb48,
[PIPE_FORMAT_R8G8_SINT] = 0xb98,
[PIPE_FORMAT_R16_UNORM] = 0xbe8,
[PIPE_FORMAT_R16_SNORM] = 0xc48,
[PIPE_FORMAT_R16_SINT] = 0xca0,
[PIPE_FORMAT_R16_UINT] = 0xce8,
[PIPE_FORMAT_R16_FLOAT] = 0xd30,
[PIPE_FORMAT_R8_UNORM] = 0xd88,
[PIPE_FORMAT_R8_SNORM] = 0xde0,
[PIPE_FORMAT_R8_SINT] = 0xe38,
[PIPE_FORMAT_R8_UINT] = 0xe88,
[PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0
};