mesa: avoid generating constant vertex attributes in fixedfunc programs
Keep track of enabled/active vertex attributes. Keep track of potential vertex program outputs. When generating fragment program, replace references to fragment attributes which are effectively non-varying and non-computed passthrough attributes with references to the new CURRENT_ATTRIB tracked state value. Only downside is slight ugliness in VBO code where we need to validate state twice in succession.
This commit is contained in:
@@ -3073,6 +3073,8 @@ struct __GLcontextRec
|
||||
GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */
|
||||
GLbitfield NewState; /**< bitwise-or of _NEW_* flags */
|
||||
|
||||
GLuint varying_vp_inputs;
|
||||
|
||||
/** \name Derived state */
|
||||
/*@{*/
|
||||
GLbitfield _TriangleCaps; /**< bitwise-or of DD_* flags */
|
||||
|
@@ -465,7 +465,8 @@ _mesa_update_state_locked( GLcontext *ctx )
|
||||
_mesa_update_tnl_spaces( ctx, new_state );
|
||||
|
||||
if (ctx->FragmentProgram._MaintainTexEnvProgram) {
|
||||
prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
|
||||
prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE_MATRIX | _NEW_LIGHT |
|
||||
_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR);
|
||||
}
|
||||
if (ctx->VertexProgram._MaintainTnlProgram) {
|
||||
prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
|
||||
@@ -504,3 +505,38 @@ _mesa_update_state( GLcontext *ctx )
|
||||
_mesa_update_state_locked(ctx);
|
||||
_mesa_unlock_context_textures(ctx);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/* Want to figure out which fragment program inputs are actually
|
||||
* constant/current values from ctx->Current. These should be
|
||||
* referenced as a tracked state variable rather than a fragment
|
||||
* program input, to save the overhead of putting a constant value in
|
||||
* every submitted vertex, transferring it to hardware, interpolating
|
||||
* it across the triangle, etc...
|
||||
*
|
||||
* When there is a VP bound, just use vp->outputs. But when we're
|
||||
* generating vp from fixed function state, basically want to
|
||||
* calculate:
|
||||
*
|
||||
* vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) |
|
||||
* potential_vp_outputs )
|
||||
*
|
||||
* Where potential_vp_outputs is calculated by looking at enabled
|
||||
* texgen, etc.
|
||||
*
|
||||
* The generated fragment program should then only declare inputs that
|
||||
* may vary or otherwise differ from the ctx->Current values.
|
||||
* Otherwise, the fp should track them as state values instead.
|
||||
*/
|
||||
void
|
||||
_mesa_set_varying_vp_inputs( GLcontext *ctx,
|
||||
unsigned varying_inputs )
|
||||
{
|
||||
if (ctx->varying_vp_inputs != varying_inputs) {
|
||||
ctx->varying_vp_inputs = varying_inputs;
|
||||
ctx->NewState |= _NEW_ARRAY;
|
||||
//_mesa_printf("%s %x\n", __FUNCTION__, varying_inputs);
|
||||
}
|
||||
}
|
||||
|
@@ -37,5 +37,8 @@ _mesa_update_state( GLcontext *ctx );
|
||||
extern void
|
||||
_mesa_update_state_locked( GLcontext *ctx );
|
||||
|
||||
void
|
||||
_mesa_set_varying_vp_inputs( GLcontext *ctx,
|
||||
unsigned varying_inputs );
|
||||
|
||||
#endif
|
||||
|
@@ -189,6 +189,63 @@ static GLuint translate_tex_src_bit( GLbitfield bit )
|
||||
}
|
||||
}
|
||||
|
||||
#define VERT_BIT_TEX_ANY (0xff << VERT_ATTRIB_TEX0)
|
||||
#define VERT_RESULT_TEX_ANY (0xff << VERT_RESULT_TEX0)
|
||||
|
||||
/* Identify all possible varying inputs. The fragment program will
|
||||
* never reference non-varying inputs, but will track them via state
|
||||
* constants instead.
|
||||
*
|
||||
* This function figures out all the inputs that the fragment program
|
||||
* has access to. The bitmask is later reduced to just those which
|
||||
* are actually referenced.
|
||||
*/
|
||||
static GLuint get_fp_input_mask( GLcontext *ctx )
|
||||
{
|
||||
GLuint fp_inputs = 0;
|
||||
|
||||
if (1) {
|
||||
GLuint varying_inputs = ctx->varying_vp_inputs;
|
||||
|
||||
/* First look at what values may be computed by the generated
|
||||
* vertex program:
|
||||
*/
|
||||
if (ctx->Light.Enabled) {
|
||||
fp_inputs |= FRAG_BIT_COL0;
|
||||
|
||||
if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
|
||||
fp_inputs |= FRAG_BIT_COL1;
|
||||
}
|
||||
|
||||
fp_inputs |= (ctx->Texture._TexGenEnabled |
|
||||
ctx->Texture._TexMatEnabled) << FRAG_ATTRIB_TEX0;
|
||||
|
||||
/* Then look at what might be varying as a result of enabled
|
||||
* arrays, etc:
|
||||
*/
|
||||
if (varying_inputs & VERT_BIT_COLOR0) fp_inputs |= FRAG_BIT_COL0;
|
||||
if (varying_inputs & VERT_BIT_COLOR1) fp_inputs |= FRAG_BIT_COL1;
|
||||
|
||||
fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0)
|
||||
<< FRAG_ATTRIB_TEX0);
|
||||
|
||||
}
|
||||
else {
|
||||
/* calculate from vp->outputs */
|
||||
GLuint vp_outputs = 0;
|
||||
|
||||
if (vp_outputs & (1 << VERT_RESULT_COL0)) fp_inputs |= FRAG_BIT_COL0;
|
||||
if (vp_outputs & (1 << VERT_RESULT_COL1)) fp_inputs |= FRAG_BIT_COL1;
|
||||
|
||||
fp_inputs |= (((vp_outputs & VERT_RESULT_TEX_ANY)
|
||||
<< VERT_RESULT_TEX0)
|
||||
>> FRAG_ATTRIB_TEX0);
|
||||
}
|
||||
|
||||
return fp_inputs;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Examine current texture environment state and generate a unique
|
||||
* key to identify it.
|
||||
@@ -196,17 +253,21 @@ static GLuint translate_tex_src_bit( GLbitfield bit )
|
||||
static void make_state_key( GLcontext *ctx, struct state_key *key )
|
||||
{
|
||||
GLuint i, j;
|
||||
|
||||
GLuint inputs_referenced = FRAG_BIT_COL0;
|
||||
GLuint inputs_available = get_fp_input_mask( ctx );
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
for (i=0;i<MAX_TEXTURE_UNITS;i++) {
|
||||
const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
|
||||
|
||||
if (!texUnit->_ReallyEnabled)
|
||||
if (!texUnit->_ReallyEnabled)
|
||||
continue;
|
||||
|
||||
key->unit[i].enabled = 1;
|
||||
key->enabled_units |= (1<<i);
|
||||
key->nr_enabled_units = i+1;
|
||||
inputs_referenced |= FRAG_BIT_TEX(i);
|
||||
|
||||
key->unit[i].source_index =
|
||||
translate_tex_src_bit(texUnit->_ReallyEnabled);
|
||||
@@ -234,13 +295,18 @@ static void make_state_key( GLcontext *ctx, struct state_key *key )
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
|
||||
if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
|
||||
key->separate_specular = 1;
|
||||
inputs_referenced |= FRAG_BIT_COL1;
|
||||
}
|
||||
|
||||
if (ctx->Fog.Enabled) {
|
||||
key->fog_enabled = 1;
|
||||
key->fog_mode = translate_fog_mode(ctx->Fog.Mode);
|
||||
inputs_referenced |= FRAG_BIT_FOGC; /* maybe */
|
||||
}
|
||||
|
||||
key->inputs_available = (inputs_available & inputs_referenced);
|
||||
}
|
||||
|
||||
/* Use uregs to represent registers internally, translate to Mesa's
|
||||
@@ -446,11 +512,29 @@ static struct ureg register_param5( struct texenv_fragment_program *p,
|
||||
#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0)
|
||||
#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
|
||||
|
||||
static GLuint frag_to_vert_attrib( GLuint attrib )
|
||||
{
|
||||
switch (attrib) {
|
||||
case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0;
|
||||
case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1;
|
||||
default:
|
||||
assert(attrib >= FRAG_ATTRIB_TEX0);
|
||||
assert(attrib <= FRAG_ATTRIB_TEX7);
|
||||
return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static struct ureg register_input( struct texenv_fragment_program *p, GLuint input )
|
||||
{
|
||||
p->program->Base.InputsRead |= (1 << input);
|
||||
return make_ureg(PROGRAM_INPUT, input);
|
||||
if (p->state->inputs_available & (1<<input)) {
|
||||
p->program->Base.InputsRead |= (1 << input);
|
||||
return make_ureg(PROGRAM_INPUT, input);
|
||||
}
|
||||
else {
|
||||
GLuint idx = frag_to_vert_attrib( input );
|
||||
return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, idx );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@@ -127,6 +127,7 @@ static void recalculate_input_bindings( GLcontext *ctx )
|
||||
struct vbo_context *vbo = vbo_context(ctx);
|
||||
struct vbo_exec_context *exec = &vbo->exec;
|
||||
const struct gl_client_array **inputs = &exec->array.inputs[0];
|
||||
GLuint const_inputs = 0;
|
||||
GLuint i;
|
||||
|
||||
exec->array.program_mode = get_program_mode(ctx);
|
||||
@@ -141,19 +142,24 @@ static void recalculate_input_bindings( GLcontext *ctx )
|
||||
for (i = 0; i <= VERT_ATTRIB_TEX7; i++) {
|
||||
if (exec->array.legacy_array[i]->Enabled)
|
||||
inputs[i] = exec->array.legacy_array[i];
|
||||
else
|
||||
else {
|
||||
inputs[i] = &vbo->legacy_currval[i];
|
||||
const_inputs |= 1 << i;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < MAT_ATTRIB_MAX; i++) {
|
||||
inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i];
|
||||
const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
|
||||
}
|
||||
|
||||
/* Could use just about anything, just to fill in the empty
|
||||
* slots:
|
||||
*/
|
||||
for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++)
|
||||
for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) {
|
||||
inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
|
||||
const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
|
||||
}
|
||||
|
||||
break;
|
||||
case VP_NV:
|
||||
@@ -166,15 +172,19 @@ static void recalculate_input_bindings( GLcontext *ctx )
|
||||
inputs[i] = exec->array.generic_array[i];
|
||||
else if (exec->array.legacy_array[i]->Enabled)
|
||||
inputs[i] = exec->array.legacy_array[i];
|
||||
else
|
||||
else {
|
||||
inputs[i] = &vbo->legacy_currval[i];
|
||||
const_inputs |= 1 << i;
|
||||
}
|
||||
}
|
||||
|
||||
/* Could use just about anything, just to fill in the empty
|
||||
* slots:
|
||||
*/
|
||||
for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++)
|
||||
for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
|
||||
inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0];
|
||||
const_inputs |= 1 << i;
|
||||
}
|
||||
|
||||
break;
|
||||
case VP_ARB:
|
||||
@@ -189,25 +199,34 @@ static void recalculate_input_bindings( GLcontext *ctx )
|
||||
inputs[0] = exec->array.generic_array[0];
|
||||
else if (exec->array.legacy_array[0]->Enabled)
|
||||
inputs[0] = exec->array.legacy_array[0];
|
||||
else
|
||||
else {
|
||||
inputs[0] = &vbo->legacy_currval[0];
|
||||
const_inputs |= 1 << 0;
|
||||
}
|
||||
|
||||
|
||||
for (i = 1; i <= VERT_ATTRIB_TEX7; i++) {
|
||||
if (exec->array.legacy_array[i]->Enabled)
|
||||
inputs[i] = exec->array.legacy_array[i];
|
||||
else
|
||||
else {
|
||||
inputs[i] = &vbo->legacy_currval[i];
|
||||
const_inputs |= 1 << i;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (exec->array.generic_array[i]->Enabled)
|
||||
inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i];
|
||||
else
|
||||
else {
|
||||
inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
|
||||
const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
_mesa_set_varying_vp_inputs( ctx, ~const_inputs );
|
||||
}
|
||||
|
||||
static void bind_arrays( GLcontext *ctx )
|
||||
@@ -257,6 +276,11 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
|
||||
|
||||
bind_arrays( ctx );
|
||||
|
||||
/* Again...
|
||||
*/
|
||||
if (ctx->NewState)
|
||||
_mesa_update_state( ctx );
|
||||
|
||||
prim[0].begin = 1;
|
||||
prim[0].end = 1;
|
||||
prim[0].weak = 0;
|
||||
@@ -297,6 +321,9 @@ vbo_exec_DrawRangeElements(GLenum mode,
|
||||
|
||||
bind_arrays( ctx );
|
||||
|
||||
if (ctx->NewState)
|
||||
_mesa_update_state( ctx );
|
||||
|
||||
ib.count = count;
|
||||
ib.type = type;
|
||||
ib.obj = ctx->Array.ElementArrayBufferObj;
|
||||
|
@@ -150,6 +150,7 @@ static void vbo_exec_bind_arrays( GLcontext *ctx )
|
||||
GLubyte *data = exec->vtx.buffer_map;
|
||||
const GLuint *map;
|
||||
GLuint attr;
|
||||
GLuint varying_inputs = 0;
|
||||
|
||||
/* Install the default (ie Current) attributes first, then overlay
|
||||
* all active ones.
|
||||
@@ -211,8 +212,11 @@ static void vbo_exec_bind_arrays( GLcontext *ctx )
|
||||
arrays[attr]._MaxElement = count; /* ??? */
|
||||
|
||||
data += exec->vtx.attrsz[src] * sizeof(GLfloat);
|
||||
varying_inputs |= 1<<attr;
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_set_varying_vp_inputs( ctx, varying_inputs );
|
||||
}
|
||||
|
||||
|
||||
@@ -242,6 +246,10 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec )
|
||||
*/
|
||||
vbo_exec_bind_arrays( ctx );
|
||||
|
||||
if (ctx->NewState)
|
||||
_mesa_update_state( ctx );
|
||||
|
||||
|
||||
ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
|
||||
exec->vtx.buffer_map = NULL;
|
||||
|
||||
|
@@ -118,6 +118,7 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
|
||||
GLuint data = node->buffer_offset;
|
||||
const GLuint *map;
|
||||
GLuint attr;
|
||||
GLuint varying_inputs = 0;
|
||||
|
||||
/* Install the default (ie Current) attributes first, then overlay
|
||||
* all active ones.
|
||||
@@ -167,8 +168,11 @@ static void vbo_bind_vertex_list( GLcontext *ctx,
|
||||
assert(arrays[attr].BufferObj->Name);
|
||||
|
||||
data += node->attrsz[src] * sizeof(GLfloat);
|
||||
varying_inputs |= 1<<attr;
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_set_varying_vp_inputs( ctx, varying_inputs );
|
||||
}
|
||||
|
||||
static void vbo_save_loopback_vertex_list( GLcontext *ctx,
|
||||
|
Reference in New Issue
Block a user