llvmpipe: Code generate the depth test, and include in the shader.

Only 32bit depth/stencil surfaces supported for now. Stencil ops not
implemented yet.
This commit is contained in:
José Fonseca
2009-08-21 07:48:04 +01:00
parent db35ac36d9
commit e3b38e5ec1
14 changed files with 174 additions and 871 deletions

View File

@@ -79,11 +79,8 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
struct pipe_surface *ps = llvmpipe->framebuffer.zsbuf;
cv = util_pack_z_stencil(ps->format, depth, stencil);
lp_tile_cache_clear(llvmpipe->zsbuf_cache, zero, cv);
#if !TILE_CLEAR_OPTIMIZATION
/* non-cached surface */
pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv);
#endif
}
}

View File

@@ -60,8 +60,6 @@ llvmpipe_map_transfers(struct llvmpipe_context *lp)
for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
lp_tile_cache_map_transfers(lp->cbuf_cache[i]);
}
lp_tile_cache_map_transfers(lp->zsbuf_cache);
}
@@ -76,8 +74,6 @@ llvmpipe_unmap_transfers(struct llvmpipe_context *lp)
for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
lp_tile_cache_unmap_transfers(lp->cbuf_cache[i]);
}
lp_tile_cache_unmap_transfers(lp->zsbuf_cache);
}
@@ -90,12 +86,10 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
draw_destroy( llvmpipe->draw );
llvmpipe->quad.shade->destroy( llvmpipe->quad.shade );
llvmpipe->quad.depth_test->destroy( llvmpipe->quad.depth_test );
llvmpipe->quad.blend->destroy( llvmpipe->quad.blend );
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]);
lp_destroy_tile_cache(llvmpipe->zsbuf_cache);
for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]);
@@ -216,7 +210,6 @@ llvmpipe_create( struct pipe_screen *screen )
*/
for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
llvmpipe->cbuf_cache[i] = lp_create_tile_cache( screen );
llvmpipe->zsbuf_cache = lp_create_tile_cache( screen );
for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen );
@@ -224,7 +217,6 @@ llvmpipe_create( struct pipe_screen *screen )
/* setup quad rendering stages */
llvmpipe->quad.shade = lp_quad_shade_stage(llvmpipe);
llvmpipe->quad.depth_test = lp_quad_depth_test_stage(llvmpipe);
llvmpipe->quad.blend = lp_quad_blend_stage(llvmpipe);
/* vertex shader samplers */

View File

@@ -117,7 +117,6 @@ struct llvmpipe_context {
/** Software quad rendering pipeline */
struct {
struct quad_stage *shade;
struct quad_stage *depth_test;
struct quad_stage *blend;
struct quad_stage *first; /**< points to one of the above stages */
@@ -140,7 +139,6 @@ struct llvmpipe_context {
boolean dirty_render_cache;
struct llvmpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS];
struct llvmpipe_tile_cache *zsbuf_cache;
unsigned tex_timestamp;
struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];

View File

@@ -80,8 +80,7 @@ llvmpipe_flush( struct pipe_context *pipe,
if (llvmpipe->cbuf_cache[i])
lp_flush_tile_cache(llvmpipe->cbuf_cache[i]);
if (llvmpipe->zsbuf_cache)
lp_flush_tile_cache(llvmpipe->zsbuf_cache);
/* FIXME: untile zsbuf! */
llvmpipe->dirty_render_cache = FALSE;
}

View File

@@ -84,7 +84,6 @@ struct quad_header_output
{
/** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
float depth[QUAD_SIZE];
};

View File

@@ -1,792 +0,0 @@
/**************************************************************************
*
* Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/**
* \brief Quad depth testing
*/
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "tgsi/tgsi_scan.h"
#include "lp_context.h"
#include "lp_quad.h"
#include "lp_surface.h"
#include "lp_quad_pipe.h"
#include "lp_tile_cache.h"
#include "lp_state.h" /* for lp_fragment_shader */
struct depth_data {
struct pipe_surface *ps;
enum pipe_format format;
unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */
unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */
ubyte stencilVals[QUAD_SIZE];
struct llvmpipe_cached_tile *tile;
};
static void
get_depth_stencil_values( struct depth_data *data,
const struct quad_header *quad )
{
unsigned j;
const struct llvmpipe_cached_tile *tile = data->tile;
switch (data->format) {
case PIPE_FORMAT_Z16_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
data->bzzzz[j] = tile->data.depth16[y][x];
}
break;
case PIPE_FORMAT_Z32_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
data->bzzzz[j] = tile->data.depth32[y][x];
}
break;
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
}
break;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
}
break;
default:
assert(0);
}
}
/* If the shader has not been run, interpolate the depth values
* ourselves.
*/
static void
interpolate_quad_depth( struct quad_header *quad )
{
const float fx = (float) quad->input.x0;
const float fy = (float) quad->input.y0;
const float dzdx = quad->coef->dadx[0][2];
const float dzdy = quad->coef->dady[0][2];
const float z0 = quad->coef->a0[0][2] + dzdx * fx + dzdy * fy;
quad->output.depth[0] = z0;
quad->output.depth[1] = z0 + dzdx;
quad->output.depth[2] = z0 + dzdy;
quad->output.depth[3] = z0 + dzdx + dzdy;
}
static void
convert_quad_depth( struct depth_data *data,
const struct quad_header *quad )
{
unsigned j;
/* Convert quad's float depth values to int depth values (qzzzz).
* If the Z buffer stores integer values, we _have_ to do the depth
* compares with integers (not floats). Otherwise, the float->int->float
* conversion of Z values (which isn't an identity function) will cause
* Z-fighting errors.
*/
switch (data->format) {
case PIPE_FORMAT_Z16_UNORM:
{
float scale = 65535.0;
for (j = 0; j < QUAD_SIZE; j++) {
data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
}
break;
case PIPE_FORMAT_Z32_UNORM:
{
double scale = (double) (uint) ~0UL;
for (j = 0; j < QUAD_SIZE; j++) {
data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
}
break;
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8Z24_UNORM:
{
float scale = (float) ((1 << 24) - 1);
for (j = 0; j < QUAD_SIZE; j++) {
data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
}
break;
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24S8_UNORM:
{
float scale = (float) ((1 << 24) - 1);
for (j = 0; j < QUAD_SIZE; j++) {
data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
}
}
break;
default:
assert(0);
}
}
static void
write_depth_stencil_values( struct depth_data *data,
struct quad_header *quad )
{
struct llvmpipe_cached_tile *tile = data->tile;
unsigned j;
/* put updated Z values back into cached tile */
switch (data->format) {
case PIPE_FORMAT_Z16_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
}
break;
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_Z32_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = data->bzzzz[j];
}
break;
case PIPE_FORMAT_S8Z24_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
}
break;
case PIPE_FORMAT_Z24S8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
}
break;
case PIPE_FORMAT_Z24X8_UNORM:
for (j = 0; j < QUAD_SIZE; j++) {
int x = quad->input.x0 % TILE_SIZE + (j & 1);
int y = quad->input.y0 % TILE_SIZE + (j >> 1);
tile->data.depth32[y][x] = data->bzzzz[j] << 8;
}
break;
default:
assert(0);
}
}
/** Only 8-bit stencil supported */
#define STENCIL_MAX 0xff
/**
* Do the basic stencil test (compare stencil buffer values against the
* reference value.
*
* \param data->stencilVals the stencil values from the stencil buffer
* \param func the stencil func (PIPE_FUNC_x)
* \param ref the stencil reference value
* \param valMask the stencil value mask indicating which bits of the stencil
* values and ref value are to be used.
* \return mask indicating which pixels passed the stencil test
*/
static unsigned
do_stencil_test(struct depth_data *data,
unsigned func,
unsigned ref, unsigned valMask)
{
unsigned passMask = 0x0;
unsigned j;
ref &= valMask;
switch (func) {
case PIPE_FUNC_NEVER:
/* passMask = 0x0 */
break;
case PIPE_FUNC_LESS:
for (j = 0; j < QUAD_SIZE; j++) {
if (ref < (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_EQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (ref == (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_LEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (ref <= (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_GREATER:
for (j = 0; j < QUAD_SIZE; j++) {
if (ref > (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_NOTEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (ref != (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_GEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (ref >= (data->stencilVals[j] & valMask)) {
passMask |= (1 << j);
}
}
break;
case PIPE_FUNC_ALWAYS:
passMask = MASK_ALL;
break;
default:
assert(0);
}
return passMask;
}
/**
* Apply the stencil operator to stencil values.
*
* \param data->stencilVals the stencil buffer values (read and written)
* \param mask indicates which pixels to update
* \param op the stencil operator (PIPE_STENCIL_OP_x)
* \param ref the stencil reference value
* \param wrtMask writemask controlling which bits are changed in the
* stencil values
*/
static void
apply_stencil_op(struct depth_data *data,
unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
{
unsigned j;
ubyte newstencil[QUAD_SIZE];
for (j = 0; j < QUAD_SIZE; j++) {
newstencil[j] = data->stencilVals[j];
}
switch (op) {
case PIPE_STENCIL_OP_KEEP:
/* no-op */
break;
case PIPE_STENCIL_OP_ZERO:
for (j = 0; j < QUAD_SIZE; j++) {
if (mask & (1 << j)) {
newstencil[j] = 0;
}
}
break;
case PIPE_STENCIL_OP_REPLACE:
for (j = 0; j < QUAD_SIZE; j++) {
if (mask & (1 << j)) {
newstencil[j] = ref;
}
}
break;
case PIPE_STENCIL_OP_INCR:
for (j = 0; j < QUAD_SIZE; j++) {
if (mask & (1 << j)) {
if (data->stencilVals[j] < STENCIL_MAX) {
newstencil[j] = data->stencilVals[j] + 1;
}
}
}
break;
case PIPE_STENCIL_OP_DECR:
for (j = 0; j < QUAD_SIZE; j++) {
if (mask & (1 << j)) {
if (data->stencilVals[j] > 0) {
newstencil[j] = data->stencilVals[j] - 1;
}
}
}
break;
case PIPE_STENCIL_OP_INCR_WRAP:
for (j = 0; j < QUAD_SIZE; j++) {
if (mask & (1 << j)) {
newstencil[j] = data->stencilVals[j] + 1;
}
}
break;
case PIPE_STENCIL_OP_DECR_WRAP:
for (j = 0; j < QUAD_SIZE; j++) {
if (mask & (1 << j)) {
newstencil[j] = data->stencilVals[j] - 1;
}
}
break;
case PIPE_STENCIL_OP_INVERT:
for (j = 0; j < QUAD_SIZE; j++) {
if (mask & (1 << j)) {
newstencil[j] = ~data->stencilVals[j];
}
}
break;
default:
assert(0);
}
/*
* update the stencil values
*/
if (wrtMask != STENCIL_MAX) {
/* apply bit-wise stencil buffer writemask */
for (j = 0; j < QUAD_SIZE; j++) {
data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
}
}
else {
for (j = 0; j < QUAD_SIZE; j++) {
data->stencilVals[j] = newstencil[j];
}
}
}
/*
* To increase efficiency, we should probably have multiple versions
* of this function that are specifically for Z16, Z32 and FP Z buffers.
* Try to effectively do that with codegen...
*/
static boolean
depth_test_quad(struct quad_stage *qs,
struct depth_data *data,
struct quad_header *quad)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
unsigned zmask = 0;
unsigned j;
switch (llvmpipe->depth_stencil->depth.func) {
case PIPE_FUNC_NEVER:
/* zmask = 0 */
break;
case PIPE_FUNC_LESS:
/* Note this is pretty much a single sse or cell instruction.
* Like this: quad->mask &= (quad->outputs.depth < zzzz);
*/
for (j = 0; j < QUAD_SIZE; j++) {
if (data->qzzzz[j] < data->bzzzz[j])
zmask |= 1 << j;
}
break;
case PIPE_FUNC_EQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (data->qzzzz[j] == data->bzzzz[j])
zmask |= 1 << j;
}
break;
case PIPE_FUNC_LEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (data->qzzzz[j] <= data->bzzzz[j])
zmask |= (1 << j);
}
break;
case PIPE_FUNC_GREATER:
for (j = 0; j < QUAD_SIZE; j++) {
if (data->qzzzz[j] > data->bzzzz[j])
zmask |= (1 << j);
}
break;
case PIPE_FUNC_NOTEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (data->qzzzz[j] != data->bzzzz[j])
zmask |= (1 << j);
}
break;
case PIPE_FUNC_GEQUAL:
for (j = 0; j < QUAD_SIZE; j++) {
if (data->qzzzz[j] >= data->bzzzz[j])
zmask |= (1 << j);
}
break;
case PIPE_FUNC_ALWAYS:
zmask = MASK_ALL;
break;
default:
assert(0);
}
quad->inout.mask &= zmask;
if (quad->inout.mask == 0)
return FALSE;
/* Update our internal copy only if writemask set. Even if
* depth.writemask is FALSE, may still need to write out buffer
* data due to stencil changes.
*/
if (llvmpipe->depth_stencil->depth.writemask) {
for (j = 0; j < QUAD_SIZE; j++) {
if (quad->inout.mask & (1 << j)) {
data->bzzzz[j] = data->qzzzz[j];
}
}
}
return TRUE;
}
/**
* Do stencil (and depth) testing. Stenciling depends on the outcome of
* depth testing.
*/
static boolean
depth_stencil_test_quad(struct quad_stage *qs,
struct depth_data *data,
struct quad_header *quad)
{
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
unsigned func, zFailOp, zPassOp, failOp;
ubyte ref, wrtMask, valMask;
uint face = quad->input.facing;
if (!llvmpipe->depth_stencil->stencil[1].enabled) {
/* single-sided stencil test, use front (face=0) state */
face = 0;
}
/* choose front or back face function, operator, etc */
/* XXX we could do these initializations once per primitive */
func = llvmpipe->depth_stencil->stencil[face].func;
failOp = llvmpipe->depth_stencil->stencil[face].fail_op;
zFailOp = llvmpipe->depth_stencil->stencil[face].zfail_op;
zPassOp = llvmpipe->depth_stencil->stencil[face].zpass_op;
ref = llvmpipe->depth_stencil->stencil[face].ref_value;
wrtMask = llvmpipe->depth_stencil->stencil[face].writemask;
valMask = llvmpipe->depth_stencil->stencil[face].valuemask;
/* do the stencil test first */
{
unsigned passMask, failMask;
passMask = do_stencil_test(data, func, ref, valMask);
failMask = quad->inout.mask & ~passMask;
quad->inout.mask &= passMask;
if (failOp != PIPE_STENCIL_OP_KEEP) {
apply_stencil_op(data, failMask, failOp, ref, wrtMask);
}
}
if (quad->inout.mask) {
/* now the pixels that passed the stencil test are depth tested */
if (llvmpipe->depth_stencil->depth.enabled) {
const unsigned origMask = quad->inout.mask;
depth_test_quad(qs, data, quad); /* quad->mask is updated */
/* update stencil buffer values according to z pass/fail result */
if (zFailOp != PIPE_STENCIL_OP_KEEP) {
const unsigned failMask = origMask & ~quad->inout.mask;
apply_stencil_op(data, failMask, zFailOp, ref, wrtMask);
}
if (zPassOp != PIPE_STENCIL_OP_KEEP) {
const unsigned passMask = origMask & quad->inout.mask;
apply_stencil_op(data, passMask, zPassOp, ref, wrtMask);
}
}
else {
/* no depth test, apply Zpass operator to stencil buffer values */
apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
}
}
return quad->inout.mask != 0;
}
static unsigned mask_count[0x8] =
{
0, /* 0x0 */
1, /* 0x1 */
1, /* 0x2 */
2, /* 0x3 */
1, /* 0x4 */
2, /* 0x5 */
2, /* 0x6 */
3, /* 0x7 */
};
static void
depth_test_quads_fallback(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
unsigned i, pass = 0;
const struct lp_fragment_shader *fs = qs->llvmpipe->fs;
boolean interp_depth = !fs->info.writes_z;
struct depth_data data;
if (qs->llvmpipe->framebuffer.zsbuf &&
(qs->llvmpipe->depth_stencil->depth.enabled ||
qs->llvmpipe->depth_stencil->stencil[0].enabled)) {
data.ps = qs->llvmpipe->framebuffer.zsbuf;
data.format = data.ps->format;
data.tile = lp_get_cached_tile(qs->llvmpipe->zsbuf_cache,
quads[0]->input.x0,
quads[0]->input.y0);
for (i = 0; i < nr; i++) {
if(!quads[i]->inout.mask)
continue;
get_depth_stencil_values(&data, quads[i]);
if (qs->llvmpipe->depth_stencil->depth.enabled) {
if (interp_depth)
interpolate_quad_depth(quads[i]);
convert_quad_depth(&data, quads[i]);
}
if (qs->llvmpipe->depth_stencil->stencil[0].enabled) {
if (!depth_stencil_test_quad(qs, &data, quads[i]))
continue;
}
else {
if (!depth_test_quad(qs, &data, quads[i]))
continue;
}
if (qs->llvmpipe->depth_stencil->stencil[0].enabled ||
qs->llvmpipe->depth_stencil->depth.writemask)
write_depth_stencil_values(&data, quads[i]);
qs->llvmpipe->occlusion_count += mask_count[quads[i]->inout.mask];
++pass;
}
}
if (pass)
qs->next->run(qs->next, quads, nr);
}
/* XXX: this function assumes setup function actually emits linear
* spans of quads. It seems a lot more natural to do (early)
* depth-testing on spans rather than quads.
*/
static void
depth_interp_z16_less_write(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
unsigned i, pass = 0;
const unsigned ix = quads[0]->input.x0;
const unsigned iy = quads[0]->input.y0;
const float fx = (float) ix;
const float fy = (float) iy;
const float dzdx = quads[0]->coef->dadx[0][2];
const float dzdy = quads[0]->coef->dady[0][2];
const float z0 = quads[0]->coef->a0[0][2] + dzdx * fx + dzdy * fy;
struct llvmpipe_cached_tile *tile;
ushort (*depth16)[TILE_SIZE];
ushort idepth[4], depth_step;
const float scale = 65535.0;
idepth[0] = (ushort)((z0) * scale);
idepth[1] = (ushort)((z0 + dzdx) * scale);
idepth[2] = (ushort)((z0 + dzdy) * scale);
idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
depth_step = (ushort)(dzdx * 2 * scale);
tile = lp_get_cached_tile(qs->llvmpipe->zsbuf_cache, ix, iy);
depth16 = (ushort (*)[TILE_SIZE])
&tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
for (i = 0; i < nr; i++) {
unsigned outmask = quads[i]->inout.mask;
unsigned mask = 0;
if ((outmask & 1) && idepth[0] < depth16[0][0]) {
depth16[0][0] = idepth[0];
mask |= (1 << 0);
}
if ((outmask & 2) && idepth[1] < depth16[0][1]) {
depth16[0][1] = idepth[1];
mask |= (1 << 1);
}
if ((outmask & 4) && idepth[2] < depth16[1][0]) {
depth16[1][0] = idepth[2];
mask |= (1 << 2);
}
if ((outmask & 8) && idepth[3] < depth16[1][1]) {
depth16[1][1] = idepth[3];
mask |= (1 << 3);
}
idepth[0] += depth_step;
idepth[1] += depth_step;
idepth[2] += depth_step;
idepth[3] += depth_step;
depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
quads[i]->inout.mask = mask;
if (quads[i]->inout.mask)
++pass;
}
if (pass)
qs->next->run(qs->next, quads, nr);
}
static void
depth_noop(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
qs->next->run(qs->next, quads, nr);
}
static void
choose_depth_test(struct quad_stage *qs,
struct quad_header *quads[],
unsigned nr)
{
boolean interp_depth = !qs->llvmpipe->fs->info.writes_z;
boolean depth = (qs->llvmpipe->framebuffer.zsbuf &&
qs->llvmpipe->depth_stencil->depth.enabled);
unsigned depthfunc = qs->llvmpipe->depth_stencil->depth.func;
boolean stencil = qs->llvmpipe->depth_stencil->stencil[0].enabled;
boolean depthwrite = qs->llvmpipe->depth_stencil->depth.writemask;
qs->run = depth_test_quads_fallback;
if (!depth &&
!stencil) {
qs->run = depth_noop;
}
else if (interp_depth &&
depth &&
depthfunc == PIPE_FUNC_LESS &&
depthwrite &&
!stencil)
{
switch (qs->llvmpipe->framebuffer.zsbuf->format) {
case PIPE_FORMAT_Z16_UNORM:
qs->run = depth_interp_z16_less_write;
break;
default:
break;
}
}
qs->run( qs, quads, nr );
}
static void depth_test_begin(struct quad_stage *qs)
{
qs->run = choose_depth_test;
qs->next->begin(qs->next);
}
static void depth_test_destroy(struct quad_stage *qs)
{
FREE( qs );
}
struct quad_stage *lp_quad_depth_test_stage( struct llvmpipe_context *llvmpipe )
{
struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
stage->llvmpipe = llvmpipe;
stage->begin = depth_test_begin;
stage->run = choose_depth_test;
stage->destroy = depth_test_destroy;
return stage;
}

View File

@@ -39,23 +39,21 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
#include "pipe/p_screen.h"
#include "lp_context.h"
#include "lp_state.h"
#include "lp_quad.h"
#include "lp_quad_pipe.h"
#include "lp_texture.h"
#include "lp_tex_sample.h"
struct quad_shade_stage
{
struct quad_stage stage; /**< base class */
union tgsi_exec_channel ALIGN16_ATTRIB pos[NUM_CHANNELS];
uint32_t ALIGN16_ATTRIB mask[NUM_CHANNELS];
struct pipe_transfer *transfer;
uint8_t *map;
};
@@ -79,6 +77,10 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
struct lp_fragment_shader *fs = llvmpipe->fs;
void *constants;
struct tgsi_sampler **samplers;
const unsigned x = quad->input.x0;
const unsigned y = quad->input.y0;
void *depth;
uint32_t ALIGN16_ATTRIB mask[NUM_CHANNELS];
unsigned chan_index;
assert(fs->current);
@@ -89,23 +91,38 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
qss->mask[chan_index] = ~0;
mask[chan_index] = quad->inout.mask & (1 << chan_index) ? ~0 : 0;
if(qss->map) {
assert((x % 2) == 0);
assert((y % 2) == 0);
depth = qss->map +
y*qss->transfer->stride +
2*x*qss->transfer->block.size;
}
else
depth = NULL;
assert((((uintptr_t)mask) & 0xf) == 0);
assert((((uintptr_t)quad->output.color) & 0xf) == 0);
assert((((uintptr_t)depth) & 0xf) == 0);
/* run shader */
fs->current->jit_function( quad->input.x0,
quad->input.y0,
fs->current->jit_function( x,
y,
quad->coef->a0,
quad->coef->dadx,
quad->coef->dady,
constants,
qss->mask,
mask,
quad->output.color,
quad->output.depth,
depth,
samplers);
for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
if(!qss->mask[chan_index])
if(!mask[chan_index])
quad->inout.mask &= ~(1 << chan_index);
if (quad->inout.mask == 0)
return FALSE;
@@ -168,6 +185,31 @@ shade_quads(struct quad_stage *qs,
static void
shade_begin(struct quad_stage *qs)
{
struct quad_shade_stage *qss = quad_shade_stage( qs );
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
struct pipe_screen *screen = llvmpipe->pipe.screen;
struct pipe_surface *zsbuf = llvmpipe->framebuffer.zsbuf;
if(qss->transfer) {
if(qss->map) {
screen->transfer_unmap(screen, qss->transfer);
qss->map = NULL;
}
screen->tex_transfer_destroy(qss->transfer);
qss->transfer = NULL;
}
if(zsbuf) {
qss->transfer = screen->get_tex_transfer(screen, zsbuf->texture,
zsbuf->face, zsbuf->level, zsbuf->zslice,
PIPE_TRANSFER_READ_WRITE,
0, 0, zsbuf->width, zsbuf->height);
if(qss->transfer)
qss->map = screen->transfer_map(screen, qss->transfer);
}
qs->next->begin(qs->next);
}
@@ -175,6 +217,20 @@ shade_begin(struct quad_stage *qs)
static void
shade_destroy(struct quad_stage *qs)
{
struct quad_shade_stage *qss = quad_shade_stage( qs );
struct llvmpipe_context *llvmpipe = qs->llvmpipe;
struct pipe_screen *screen = llvmpipe->pipe.screen;
if(qss->transfer) {
if(qss->map) {
screen->transfer_unmap(screen, qss->transfer);
qss->map = NULL;
}
screen->tex_transfer_destroy(qss->transfer);
qss->transfer = NULL;
}
align_free( qs );
}

View File

@@ -42,22 +42,8 @@ lp_push_quad_first( struct llvmpipe_context *lp,
void
lp_build_quad_pipeline(struct llvmpipe_context *lp)
{
boolean early_depth_test =
lp->depth_stencil->depth.enabled &&
lp->framebuffer.zsbuf &&
!lp->depth_stencil->alpha.enabled &&
!lp->fs->info.uses_kill &&
!lp->fs->info.writes_z;
lp->quad.first = lp->quad.blend;
if (early_depth_test) {
lp_push_quad_first( lp, lp->quad.shade );
lp_push_quad_first( lp, lp->quad.depth_test );
}
else {
lp_push_quad_first( lp, lp->quad.depth_test );
lp_push_quad_first( lp, lp->quad.shade );
}
}

View File

@@ -58,13 +58,10 @@ struct quad_stage {
struct quad_stage *lp_quad_polygon_stipple_stage( struct llvmpipe_context *llvmpipe );
struct quad_stage *lp_quad_earlyz_stage( struct llvmpipe_context *llvmpipe );
struct quad_stage *lp_quad_shade_stage( struct llvmpipe_context *llvmpipe );
struct quad_stage *lp_quad_alpha_test_stage( struct llvmpipe_context *llvmpipe );
struct quad_stage *lp_quad_stencil_test_stage( struct llvmpipe_context *llvmpipe );
struct quad_stage *lp_quad_depth_test_stage( struct llvmpipe_context *llvmpipe );
struct quad_stage *lp_quad_occlusion_stage( struct llvmpipe_context *llvmpipe );
struct quad_stage *lp_quad_coverage_stage( struct llvmpipe_context *llvmpipe );
struct quad_stage *lp_quad_blend_stage( struct llvmpipe_context *llvmpipe );
struct quad_stage *lp_quad_colormask_stage( struct llvmpipe_context *llvmpipe );
struct quad_stage *lp_quad_output_stage( struct llvmpipe_context *llvmpipe );
void lp_build_quad_pipeline(struct llvmpipe_context *lp);

View File

@@ -138,6 +138,11 @@ llvmpipe_is_format_supported( struct pipe_screen *screen,
target == PIPE_TEXTURE_3D ||
target == PIPE_TEXTURE_CUBE);
if(format == PIPE_FORMAT_Z16_UNORM)
return FALSE;
if(format == PIPE_FORMAT_S8_UNORM)
return FALSE;
switch(format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:

View File

@@ -74,15 +74,18 @@ typedef void
struct lp_fragment_shader;
/**
* Subclass of pipe_shader_state (though it doesn't really need to be).
*
* This is starting to look an awful lot like a quad pipeline stage...
*/
struct lp_fragment_shader_variant_key
{
struct pipe_depth_state depth;
struct pipe_alpha_state alpha;
};
struct lp_fragment_shader_variant
{
struct lp_fragment_shader *shader;
struct pipe_alpha_state alpha;
struct lp_fragment_shader_variant_key key;
LLVMValueRef function;
@@ -103,8 +106,6 @@ struct lp_fragment_shader
struct tgsi_shader_info info;
struct llvmpipe_screen *screen;
struct lp_fragment_shader_variant *variants;
struct lp_fragment_shader_variant *current;

View File

@@ -28,6 +28,7 @@
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "util/u_format.h"
#include "util/u_debug_dump.h"
#include "pipe/internal/p_winsys_screen.h"
#include "pipe/p_shader_tokens.h"
@@ -36,6 +37,8 @@
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_parse.h"
#include "lp_bld_type.h"
#include "lp_bld_conv.h"
#include "lp_bld_depth.h"
#include "lp_bld_tgsi.h"
#include "lp_bld_alpha.h"
#include "lp_bld_swizzle.h"
@@ -105,11 +108,54 @@ setup_pos_vector(LLVMBuilderRef builder,
}
static struct lp_fragment_shader_variant *
shader_generate(struct llvmpipe_screen *screen,
struct lp_fragment_shader *shader,
const struct pipe_alpha_state *alpha)
static void
depth_test_generate(struct llvmpipe_context *lp,
LLVMBuilderRef builder,
const struct pipe_depth_state *state,
union lp_type src_type,
LLVMValueRef *mask,
LLVMValueRef src,
LLVMValueRef dst_ptr)
{
const struct util_format_description *format_desc;
union lp_type dst_type;
if(!lp->framebuffer.zsbuf)
return;
format_desc = util_format_description(lp->framebuffer.zsbuf->format);
assert(format_desc);
dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
assert(dst_type.width == src_type.width);
assert(dst_type.length == src_type.length);
#if 1
src = lp_build_clamped_float_to_unsigned_norm(builder,
src_type,
dst_type.width,
src);
#else
lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);
#endif
lp_build_depth_test(builder,
state,
dst_type,
format_desc,
mask,
src,
dst_ptr);
}
static struct lp_fragment_shader_variant *
shader_generate(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
const struct lp_fragment_shader_variant_key *key)
{
struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
struct lp_fragment_shader_variant *variant;
const struct tgsi_token *tokens = shader->base.tokens;
union lp_type type;
@@ -140,9 +186,13 @@ shader_generate(struct llvmpipe_screen *screen,
#ifdef DEBUG
tgsi_dump(shader->base.tokens, 0);
debug_printf("alpha.enabled = %u\n", alpha->enabled);
debug_printf("alpha.func = %s\n", debug_dump_func(alpha->func, TRUE));
debug_printf("alpha.ref_value = %f\n", alpha->ref_value);
debug_printf("depth.enabled = %u\n", key->depth.enabled);
debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
debug_printf("depth.writemask = %u\n", key->depth.writemask);
debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count);
debug_printf("alpha.enabled = %u\n", key->alpha.enabled);
debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
#endif
variant = CALLOC_STRUCT(lp_fragment_shader_variant);
@@ -150,7 +200,7 @@ shader_generate(struct llvmpipe_screen *screen,
return NULL;
variant->shader = shader;
memcpy(&variant->alpha, alpha, sizeof *alpha);
memcpy(&variant->key, key, sizeof *key);
type.value = 0;
type.floating = TRUE; /* floating point values */
@@ -171,7 +221,7 @@ shader_generate(struct llvmpipe_screen *screen,
arg_types[5] = LLVMPointerType(elem_type, 0); /* consts */
arg_types[6] = LLVMPointerType(int_vec_type, 0); /* mask */
arg_types[7] = LLVMPointerType(vec_type, 0); /* color */
arg_types[8] = LLVMPointerType(vec_type, 0); /* depth */
arg_types[8] = LLVMPointerType(int_vec_type, 0); /* depth */
arg_types[9] = LLVMPointerType(LLVMInt8Type(), 0); /* samplers */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
@@ -212,6 +262,15 @@ shader_generate(struct llvmpipe_screen *screen,
mask = LLVMBuildLoad(builder, mask_ptr, "");
/* FIXME:
early_depth_test =
lp->depth_stencil->depth.enabled &&
lp->framebuffer.zsbuf &&
!lp->depth_stencil->alpha.enabled &&
!lp->fs->info.uses_kill &&
!lp->fs->info.writes_z;
*/
memset(outputs, 0, sizeof outputs);
lp_build_tgsi_soa(builder, tokens, type, &mask,
@@ -242,14 +301,18 @@ shader_generate(struct llvmpipe_screen *screen,
}
case TGSI_SEMANTIC_POSITION:
if(chan == 3)
LLVMBuildStore(builder, outputs[attrib][chan], depth_ptr);
if(chan == 2)
pos[2] = outputs[attrib][chan];
break;
}
}
}
}
depth_test_generate(lp, builder, &key->depth,
type, &mask,
pos[2], depth_ptr);
if(mask)
LLVMBuildStore(builder, mask, mask_ptr);
@@ -295,7 +358,6 @@ void *
llvmpipe_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{
struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
struct lp_fragment_shader *shader;
shader = CALLOC_STRUCT(lp_fragment_shader);
@@ -308,8 +370,6 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
/* we need to keep a local copy of the tokens */
shader->base.tokens = tgsi_dup_tokens(templ->tokens);
shader->screen = screen;
return shader;
}
@@ -328,11 +388,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
void
llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
struct lp_fragment_shader *shader = fs;
struct lp_fragment_shader_variant *variant;
struct llvmpipe_screen *screen = shader->screen;
assert(fs != llvmpipe_context(pipe)->fs);
assert(fs != llvmpipe->fs);
variant = shader->variants;
while(variant) {
@@ -436,19 +497,23 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
void llvmpipe_update_fs(struct llvmpipe_context *lp)
{
struct lp_fragment_shader *shader = lp->fs;
const struct pipe_alpha_state *alpha = &lp->depth_stencil->alpha;
struct lp_fragment_shader_variant_key key;
struct lp_fragment_shader_variant *variant;
memset(&key, 0, sizeof key);
memcpy(&key.depth, &lp->depth_stencil->depth, sizeof &key.depth);
memcpy(&key.alpha, &lp->depth_stencil->alpha, sizeof &key.alpha);
variant = shader->variants;
while(variant) {
if(memcmp(&variant->alpha, alpha, sizeof *alpha) == 0)
if(memcmp(&variant->key, &key, sizeof key) == 0)
break;
variant = variant->next;
}
if(!variant)
variant = shader_generate(shader->screen, shader, alpha);
variant = shader_generate(lp, shader, &key);
shader->current = variant;
}

View File

@@ -67,15 +67,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
/* zbuf changing? */
if (lp->framebuffer.zsbuf != fb->zsbuf) {
/* flush old */
lp_flush_tile_cache(lp->zsbuf_cache);
/* assign new */
lp->framebuffer.zsbuf = fb->zsbuf;
/* update cache */
lp_tile_cache_set_surface(lp->zsbuf_cache, fb->zsbuf);
/* Tell draw module how deep the Z/depth buffer is */
if (lp->framebuffer.zsbuf) {
int depth_bits;

View File

@@ -133,6 +133,12 @@ llvmpipe_texture_create(struct pipe_screen *screen,
pipe_reference_init(&lpt->base.reference, 1);
lpt->base.screen = screen;
/* XXX: The xlib state tracker is brain-dead and will request
* PIPE_FORMAT_Z16_UNORM no matter how much we tell it we don't support it.
*/
if(lpt->base.format == PIPE_FORMAT_Z16_UNORM)
lpt->base.format = PIPE_FORMAT_Z32_UNORM;
if (lpt->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) {
if (!llvmpipe_displaytarget_layout(screen, lpt))
goto fail;