llvmpipe: Compute interpolation coeffs directly into SoA layout.

This commit is contained in:
José Fonseca
2009-08-14 11:33:26 +01:00
parent b6f43b445b
commit 17aec9304c
4 changed files with 244 additions and 168 deletions

View File

@@ -88,6 +88,17 @@ struct quad_header_output
};
/**
* Input interpolation coefficients
*/
struct quad_interp_coef
{
float ALIGN16_ATTRIB a0[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
float ALIGN16_ATTRIB dadx[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
float ALIGN16_ATTRIB dady[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
};
/**
* Encodes everything we need to know about a 2x2 pixel block. Uses
* "Channel-Serial" or "SoA" layout.
@@ -100,7 +111,7 @@ struct quad_header {
/* Redundant/duplicated:
*/
const struct tgsi_interp_coef *posCoef;
const struct tgsi_interp_coef *coef;
const struct quad_interp_coef *coef;
};
#endif /* LP_QUAD_H */

View File

@@ -54,9 +54,6 @@ struct quad_shade_stage
struct quad_stage stage; /**< base class */
union tgsi_exec_channel ALIGN16_ATTRIB pos[NUM_CHANNELS];
float ALIGN16_ATTRIB a0[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
float ALIGN16_ATTRIB dadx[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
float ALIGN16_ATTRIB dady[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
struct tgsi_exec_vector ALIGN16_ATTRIB outputs[PIPE_MAX_ATTRIBS];
};
@@ -102,23 +99,6 @@ setup_pos_vector(struct quad_shade_stage *qss,
}
static void
setup_coef_vector(struct quad_shade_stage *qss,
const struct tgsi_interp_coef *coef)
{
unsigned num_inputs = qss->stage.llvmpipe->fs->info.num_inputs;
unsigned attrib, chan, i;
for (attrib = 0; attrib < num_inputs; ++attrib) {
for (chan = 0; chan < NUM_CHANNELS; ++chan) {
qss->a0[attrib][chan] = coef[attrib].a0[chan];
qss->dadx[attrib][chan] = coef[attrib].dadx[chan];
qss->dady[attrib][chan] = coef[attrib].dady[chan];
}
}
}
/**
* Execute fragment shader for the four fragments in the quad.
*/
@@ -142,7 +122,9 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
/* run shader */
llvmpipe->fs->jit_function( qss->pos,
qss->a0, qss->dadx, qss->dady,
quad->coef->a0,
quad->coef->dadx,
quad->coef->dady,
constants,
qss->outputs,
samplers);
@@ -217,9 +199,6 @@ shade_quads(struct quad_stage *qs,
struct quad_shade_stage *qss = quad_shade_stage( qs );
unsigned i, pass = 0;
setup_coef_vector(qss,
quads[0]->coef);
for (i = 0; i < nr; i++) {
if (!shade_quad(qs, quads[i]))
continue;

View File

@@ -92,7 +92,7 @@ struct setup_context {
struct quad_header *quad_ptrs[MAX_QUADS];
unsigned count;
struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
struct quad_interp_coef coef;
struct tgsi_interp_coef posCoef; /* For Z, W */
struct {
@@ -382,35 +382,12 @@ static boolean setup_sort_vertices( struct setup_context *setup,
}
/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
* The value value comes from vertex[slot][i].
* The result will be put into setup->coef[slot].a0[i].
* \param slot which attribute slot
* \param i which component of the slot (0..3)
*/
static void const_coeff( struct setup_context *setup,
struct tgsi_interp_coef *coef,
uint vertSlot, uint i)
{
assert(i <= 3);
coef->dadx[i] = 0;
coef->dady[i] = 0;
/* need provoking vertex info!
*/
coef->a0[i] = setup->vprovoke[vertSlot][i];
}
/**
* Compute a0, dadx and dady for a linearly interpolated coefficient,
* for a triangle.
*/
static void tri_linear_coeff( struct setup_context *setup,
struct tgsi_interp_coef *coef,
uint vertSlot, uint i)
static void tri_pos_coeff( struct setup_context *setup,
uint vertSlot, unsigned i)
{
float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
@@ -421,8 +398,8 @@ static void tri_linear_coeff( struct setup_context *setup,
assert(i <= 3);
coef->dadx[i] = dadx;
coef->dady[i] = dady;
setup->posCoef.dadx[i] = dadx;
setup->posCoef.dady[i] = dady;
/* calculate a0 as the value which would be sampled for the
* fragment at (0,0), taking into account that we want to sample at
@@ -436,20 +413,111 @@ static void tri_linear_coeff( struct setup_context *setup,
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
coef->a0[i] = (setup->vmin[vertSlot][i] -
(dadx * (setup->vmin[0][0] - 0.5f) +
dady * (setup->vmin[0][1] - 0.5f)));
setup->posCoef.a0[i] = (setup->vmin[vertSlot][i] -
(dadx * (setup->vmin[0][0] - 0.5f) +
dady * (setup->vmin[0][1] - 0.5f)));
/*
debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
slot, "xyzw"[i],
setup->coef[slot].a0[i],
setup->coef[slot].dadx[i],
setup->coef[slot].dady[i]);
slot, "xyzw"[i],
setup->coef[slot].a0[i],
setup->coef[slot].dadx[i],
setup->coef[slot].dady[i]);
*/
}
/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
* The value value comes from vertex[slot][i].
* The result will be put into setup->coef[slot].a0[i].
* \param slot which attribute slot
* \param i which component of the slot (0..3)
*/
static void const_pos_coeff( struct setup_context *setup,
uint vertSlot, unsigned i)
{
setup->posCoef.dadx[i] = 0;
setup->posCoef.dady[i] = 0;
/* need provoking vertex info!
*/
setup->posCoef.a0[i] = setup->vprovoke[vertSlot][i];
}
/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
* The value value comes from vertex[slot][i].
* The result will be put into setup->coef[slot].a0[i].
* \param slot which attribute slot
* \param i which component of the slot (0..3)
*/
static void const_coeff( struct setup_context *setup,
unsigned attrib,
uint vertSlot)
{
unsigned i;
for (i = 0; i < NUM_CHANNELS; ++i) {
setup->coef.dadx[attrib][i] = 0;
setup->coef.dady[attrib][i] = 0;
/* need provoking vertex info!
*/
setup->coef.a0[attrib][i] = setup->vprovoke[vertSlot][i];
}
}
/**
* Compute a0, dadx and dady for a linearly interpolated coefficient,
* for a triangle.
*/
static void tri_linear_coeff( struct setup_context *setup,
unsigned attrib,
uint vertSlot)
{
unsigned i;
for (i = 0; i < NUM_CHANNELS; ++i) {
float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i];
float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
float dadx = a * setup->oneoverarea;
float dady = b * setup->oneoverarea;
assert(i <= 3);
setup->coef.dadx[attrib][i] = dadx;
setup->coef.dady[attrib][i] = dady;
/* calculate a0 as the value which would be sampled for the
* fragment at (0,0), taking into account that we want to sample at
* pixel centers, in other words (0.5, 0.5).
*
* this is neat but unfortunately not a good way to do things for
* triangles with very large values of dadx or dady as it will
* result in the subtraction and re-addition from a0 of a very
* large number, which means we'll end up loosing a lot of the
* fractional bits and precision from a0. the way to fix this is
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
setup->coef.a0[attrib][i] = (setup->vmin[vertSlot][i] -
(dadx * (setup->vmin[0][0] - 0.5f) +
dady * (setup->vmin[0][1] - 0.5f)));
/*
debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
slot, "xyzw"[i],
setup->coef[slot].a0[i],
setup->coef[slot].dadx[i],
setup->coef[slot].dady[i]);
*/
}
}
/**
* Compute a0, dadx and dady for a perspective-corrected interpolant,
* for a triangle.
@@ -459,35 +527,38 @@ static void tri_linear_coeff( struct setup_context *setup,
* divide the interpolated value by the interpolated W at that fragment.
*/
static void tri_persp_coeff( struct setup_context *setup,
struct tgsi_interp_coef *coef,
uint vertSlot, uint i)
unsigned attrib,
uint vertSlot)
{
/* premultiply by 1/w (v[0][3] is always W):
*/
float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
float botda = mida - mina;
float majda = maxa - mina;
float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
float dadx = a * setup->oneoverarea;
float dady = b * setup->oneoverarea;
unsigned i;
for (i = 0; i < NUM_CHANNELS; ++i) {
/* premultiply by 1/w (v[0][3] is always W):
*/
float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3];
float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3];
float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3];
float botda = mida - mina;
float majda = maxa - mina;
float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
float dadx = a * setup->oneoverarea;
float dady = b * setup->oneoverarea;
/*
debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
setup->vmin[vertSlot][i],
setup->vmid[vertSlot][i],
setup->vmax[vertSlot][i]
);
*/
assert(i <= 3);
/*
debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
setup->vmin[vertSlot][i],
setup->vmid[vertSlot][i],
setup->vmax[vertSlot][i]
);
*/
assert(i <= 3);
coef->dadx[i] = dadx;
coef->dady[i] = dady;
coef->a0[i] = (mina -
(dadx * (setup->vmin[0][0] - 0.5f) +
dady * (setup->vmin[0][1] - 0.5f)));
setup->coef.dadx[attrib][i] = dadx;
setup->coef.dady[attrib][i] = dady;
setup->coef.a0[attrib][i] = (mina -
(dadx * (setup->vmin[0][0] - 0.5f) +
dady * (setup->vmin[0][1] - 0.5f)));
}
}
@@ -501,21 +572,21 @@ static void
setup_fragcoord_coeff(struct setup_context *setup, uint slot)
{
/*X*/
setup->coef[slot].a0[0] = 0;
setup->coef[slot].dadx[0] = 1.0;
setup->coef[slot].dady[0] = 0.0;
setup->coef.a0[slot][0] = 0;
setup->coef.dadx[slot][0] = 1.0;
setup->coef.dady[slot][0] = 0.0;
/*Y*/
setup->coef[slot].a0[1] = 0.0;
setup->coef[slot].dadx[1] = 0.0;
setup->coef[slot].dady[1] = 1.0;
setup->coef.a0[slot][1] = 0.0;
setup->coef.dadx[slot][1] = 0.0;
setup->coef.dady[slot][1] = 1.0;
/*Z*/
setup->coef[slot].a0[2] = setup->posCoef.a0[2];
setup->coef[slot].dadx[2] = setup->posCoef.dadx[2];
setup->coef[slot].dady[2] = setup->posCoef.dady[2];
setup->coef.a0[slot][2] = setup->posCoef.a0[2];
setup->coef.dadx[slot][2] = setup->posCoef.dadx[2];
setup->coef.dady[slot][2] = setup->posCoef.dady[2];
/*W*/
setup->coef[slot].a0[3] = setup->posCoef.a0[3];
setup->coef[slot].dadx[3] = setup->posCoef.dadx[3];
setup->coef[slot].dady[3] = setup->posCoef.dady[3];
setup->coef.a0[slot][3] = setup->posCoef.a0[3];
setup->coef.dadx[slot][3] = setup->posCoef.dadx[3];
setup->coef.dady[slot][3] = setup->posCoef.dady[3];
}
@@ -533,27 +604,23 @@ static void setup_tri_coefficients( struct setup_context *setup )
/* z and w are done by linear interpolation:
*/
tri_linear_coeff(setup, &setup->posCoef, 0, 2);
tri_linear_coeff(setup, &setup->posCoef, 0, 3);
tri_pos_coeff(setup, 0, 2);
tri_pos_coeff(setup, 0, 3);
/* setup interpolation for all the remaining attributes:
*/
for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
const uint vertSlot = vinfo->attrib[fragSlot].src_index;
uint j;
switch (vinfo->attrib[fragSlot].interp_mode) {
case INTERP_CONSTANT:
for (j = 0; j < NUM_CHANNELS; j++)
const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
const_coeff(setup, fragSlot, vertSlot);
break;
case INTERP_LINEAR:
for (j = 0; j < NUM_CHANNELS; j++)
tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
tri_linear_coeff(setup, fragSlot, vertSlot);
break;
case INTERP_PERSPECTIVE:
for (j = 0; j < NUM_CHANNELS; j++)
tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
tri_persp_coeff(setup, fragSlot, vertSlot);
break;
case INTERP_POS:
setup_fragcoord_coeff(setup, fragSlot);
@@ -563,9 +630,9 @@ static void setup_tri_coefficients( struct setup_context *setup )
}
if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
setup->coef[fragSlot].a0[0] = 1.0f - setup->facing;
setup->coef[fragSlot].dadx[0] = 0.0;
setup->coef[fragSlot].dady[0] = 0.0;
setup->coef.a0[fragSlot][0] = 1.0f - setup->facing;
setup->coef.dadx[fragSlot][0] = 0.0;
setup->coef.dady[fragSlot][0] = 0.0;
}
}
}
@@ -769,18 +836,40 @@ void setup_tri( struct setup_context *setup,
* for a line.
*/
static void
line_linear_coeff(const struct setup_context *setup,
struct tgsi_interp_coef *coef,
uint vertSlot, uint i)
linear_pos_coeff(struct setup_context *setup,
uint vertSlot, uint i)
{
const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
const float dadx = da * setup->emaj.dx * setup->oneoverarea;
const float dady = da * setup->emaj.dy * setup->oneoverarea;
coef->dadx[i] = dadx;
coef->dady[i] = dady;
coef->a0[i] = (setup->vmin[vertSlot][i] -
(dadx * (setup->vmin[0][0] - 0.5f) +
dady * (setup->vmin[0][1] - 0.5f)));
setup->posCoef.dadx[i] = dadx;
setup->posCoef.dady[i] = dady;
setup->posCoef.a0[i] = (setup->vmin[vertSlot][i] -
(dadx * (setup->vmin[0][0] - 0.5f) +
dady * (setup->vmin[0][1] - 0.5f)));
}
/**
* Compute a0, dadx and dady for a linearly interpolated coefficient,
* for a line.
*/
static void
line_linear_coeff(struct setup_context *setup,
unsigned attrib,
uint vertSlot)
{
unsigned i;
for (i = 0; i < NUM_CHANNELS; ++i) {
const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i];
const float dadx = da * setup->emaj.dx * setup->oneoverarea;
const float dady = da * setup->emaj.dy * setup->oneoverarea;
setup->coef.dadx[attrib][i] = dadx;
setup->coef.dady[attrib][i] = dady;
setup->coef.a0[attrib][i] = (setup->vmin[vertSlot][i] -
(dadx * (setup->vmin[0][0] - 0.5f) +
dady * (setup->vmin[0][1] - 0.5f)));
}
}
@@ -789,21 +878,24 @@ line_linear_coeff(const struct setup_context *setup,
* for a line.
*/
static void
line_persp_coeff(const struct setup_context *setup,
struct tgsi_interp_coef *coef,
uint vertSlot, uint i)
line_persp_coeff(struct setup_context *setup,
unsigned attrib,
uint vertSlot)
{
/* XXX double-check/verify this arithmetic */
const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
const float da = a1 - a0;
const float dadx = da * setup->emaj.dx * setup->oneoverarea;
const float dady = da * setup->emaj.dy * setup->oneoverarea;
coef->dadx[i] = dadx;
coef->dady[i] = dady;
coef->a0[i] = (setup->vmin[vertSlot][i] -
(dadx * (setup->vmin[0][0] - 0.5f) +
dady * (setup->vmin[0][1] - 0.5f)));
unsigned i;
for (i = 0; i < NUM_CHANNELS; ++i) {
/* XXX double-check/verify this arithmetic */
const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3];
const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3];
const float da = a1 - a0;
const float dadx = da * setup->emaj.dx * setup->oneoverarea;
const float dady = da * setup->emaj.dy * setup->oneoverarea;
setup->coef.dadx[attrib][i] = dadx;
setup->coef.dady[attrib][i] = dady;
setup->coef.a0[attrib][i] = (setup->vmin[vertSlot][i] -
(dadx * (setup->vmin[0][0] - 0.5f) +
dady * (setup->vmin[0][1] - 0.5f)));
}
}
@@ -841,27 +933,23 @@ setup_line_coefficients(struct setup_context *setup,
/* z and w are done by linear interpolation:
*/
line_linear_coeff(setup, &setup->posCoef, 0, 2);
line_linear_coeff(setup, &setup->posCoef, 0, 3);
linear_pos_coeff(setup, 0, 2);
linear_pos_coeff(setup, 0, 3);
/* setup interpolation for all the remaining attributes:
*/
for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
const uint vertSlot = vinfo->attrib[fragSlot].src_index;
uint j;
switch (vinfo->attrib[fragSlot].interp_mode) {
case INTERP_CONSTANT:
for (j = 0; j < NUM_CHANNELS; j++)
const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
const_coeff(setup, fragSlot, vertSlot);
break;
case INTERP_LINEAR:
for (j = 0; j < NUM_CHANNELS; j++)
line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
line_linear_coeff(setup, fragSlot, vertSlot);
break;
case INTERP_PERSPECTIVE:
for (j = 0; j < NUM_CHANNELS; j++)
line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
line_persp_coeff(setup, fragSlot, vertSlot);
break;
case INTERP_POS:
setup_fragcoord_coeff(setup, fragSlot);
@@ -871,9 +959,9 @@ setup_line_coefficients(struct setup_context *setup,
}
if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
setup->coef[fragSlot].a0[0] = 1.0f - setup->facing;
setup->coef[fragSlot].dadx[0] = 0.0;
setup->coef[fragSlot].dady[0] = 0.0;
setup->coef.a0[fragSlot][0] = 1.0f - setup->facing;
setup->coef.dadx[fragSlot][0] = 0.0;
setup->coef.dady[fragSlot][0] = 0.0;
}
}
return TRUE;
@@ -1027,15 +1115,17 @@ setup_line(struct setup_context *setup,
static void
point_persp_coeff(const struct setup_context *setup,
point_persp_coeff(struct setup_context *setup,
const float (*vert)[4],
struct tgsi_interp_coef *coef,
uint vertSlot, uint i)
unsigned attrib,
uint vertSlot)
{
assert(i <= 3);
coef->dadx[i] = 0.0F;
coef->dady[i] = 0.0F;
coef->a0[i] = vert[vertSlot][i] * vert[0][3];
unsigned i;
for(i = 0; i < NUM_CHANNELS; ++i) {
setup->coef.dadx[attrib][i] = 0.0F;
setup->coef.dady[attrib][i] = 0.0F;
setup->coef.a0[attrib][i] = vert[vertSlot][i] * vert[0][3];
}
}
@@ -1090,24 +1180,20 @@ setup_point( struct setup_context *setup,
setup->vprovoke = v0;
/* setup Z, W */
const_coeff(setup, &setup->posCoef, 0, 2);
const_coeff(setup, &setup->posCoef, 0, 3);
const_pos_coeff(setup, 0, 2);
const_pos_coeff(setup, 0, 3);
for (fragSlot = 0; fragSlot < lpfs->info.num_inputs; fragSlot++) {
const uint vertSlot = vinfo->attrib[fragSlot].src_index;
uint j;
switch (vinfo->attrib[fragSlot].interp_mode) {
case INTERP_CONSTANT:
/* fall-through */
case INTERP_LINEAR:
for (j = 0; j < NUM_CHANNELS; j++)
const_coeff(setup, &setup->coef[fragSlot], vertSlot, j);
const_coeff(setup, fragSlot, vertSlot);
break;
case INTERP_PERSPECTIVE:
for (j = 0; j < NUM_CHANNELS; j++)
point_persp_coeff(setup, setup->vprovoke,
&setup->coef[fragSlot], vertSlot, j);
point_persp_coeff(setup, setup->vprovoke, fragSlot, vertSlot);
break;
case INTERP_POS:
setup_fragcoord_coeff(setup, fragSlot);
@@ -1117,9 +1203,9 @@ setup_point( struct setup_context *setup,
}
if (lpfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FACE) {
setup->coef[fragSlot].a0[0] = 1.0f - setup->facing;
setup->coef[fragSlot].dadx[0] = 0.0;
setup->coef[fragSlot].dady[0] = 0.0;
setup->coef.a0[fragSlot][0] = 1.0f - setup->facing;
setup->coef.dadx[fragSlot][0] = 0.0;
setup->coef.dady[fragSlot][0] = 0.0;
}
}
@@ -1287,7 +1373,7 @@ struct setup_context *setup_create_context( struct llvmpipe_context *llvmpipe )
setup->llvmpipe = llvmpipe;
for (i = 0; i < MAX_QUADS; i++) {
setup->quad[i].coef = setup->coef;
setup->quad[i].coef = &setup->coef;
setup->quad[i].posCoef = &setup->posCoef;
}

View File

@@ -59,11 +59,11 @@ struct vertex_info;
typedef void
(*lp_shader_fs_func)(void *pos,
void *a0,
void *dadx,
void *dady,
void *consts,
(*lp_shader_fs_func)(const void *pos,
const void *a0,
const void *dadx,
const void *dady,
const void *consts,
void *outputs,
struct tgsi_sampler **samplers);