mesa: add LodBias quantization from st/mesa
Apparently the quantization math isn't cheap. This further reduces overhead by 2% for drawoverhead/8 textures. The improvement is measured by looking at the sysprof percentage delta and multiplying by 2 (because we have the frontend and gallium threads with equal overhead, so the benefit is doubled compared to 1 thread). Both per-sampler and per-unit lod bias values are quantized. The difference in behavior is that both values are quantized separately and then added up, instead of first added up and then quantized. The worst case error is +- 1/256 in the reduced precision, i.e. off by one in a fixed-point representation, which should be fine. Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11339>
This commit is contained in:
@@ -277,6 +277,7 @@ _mesa_PushAttrib(GLbitfield mask)
|
|||||||
unsigned num_tex_used = ctx->Texture.NumCurrentTexUsed;
|
unsigned num_tex_used = ctx->Texture.NumCurrentTexUsed;
|
||||||
for (u = 0; u < num_tex_used; u++) {
|
for (u = 0; u < num_tex_used; u++) {
|
||||||
head->Texture.LodBias[u] = ctx->Texture.Unit[u].LodBias;
|
head->Texture.LodBias[u] = ctx->Texture.Unit[u].LodBias;
|
||||||
|
head->Texture.LodBiasQuantized[u] = ctx->Texture.Unit[u].LodBiasQuantized;
|
||||||
|
|
||||||
for (tex = 0; tex < NUM_TEXTURE_TARGETS; tex++) {
|
for (tex = 0; tex < NUM_TEXTURE_TARGETS; tex++) {
|
||||||
struct gl_texture_object *dst = &head->Texture.SavedObj[u][tex];
|
struct gl_texture_object *dst = &head->Texture.SavedObj[u][tex];
|
||||||
@@ -618,6 +619,7 @@ pop_texture_group(struct gl_context *ctx, struct gl_texture_attrib_node *texstat
|
|||||||
memcpy(destUnit, unit, sizeof(*unit));
|
memcpy(destUnit, unit, sizeof(*unit));
|
||||||
destUnit->_CurrentCombine = NULL;
|
destUnit->_CurrentCombine = NULL;
|
||||||
ctx->Texture.Unit[u].LodBias = texstate->LodBias[u];
|
ctx->Texture.Unit[u].LodBias = texstate->LodBias[u];
|
||||||
|
ctx->Texture.Unit[u].LodBiasQuantized = texstate->LodBiasQuantized[u];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1180,6 +1180,7 @@ struct gl_texgen
|
|||||||
struct gl_texture_unit
|
struct gl_texture_unit
|
||||||
{
|
{
|
||||||
GLfloat LodBias; /**< for biasing mipmap levels */
|
GLfloat LodBias; /**< for biasing mipmap levels */
|
||||||
|
float LodBiasQuantized; /**< to reduce pipe_sampler_state variants */
|
||||||
|
|
||||||
/** Texture targets that have a non-default texture bound */
|
/** Texture targets that have a non-default texture bound */
|
||||||
GLbitfield _BoundTextures;
|
GLbitfield _BoundTextures;
|
||||||
@@ -5094,6 +5095,7 @@ struct gl_texture_attrib_node
|
|||||||
GLuint NumTexSaved;
|
GLuint NumTexSaved;
|
||||||
struct gl_fixedfunc_texture_unit FixedFuncUnit[MAX_TEXTURE_COORD_UNITS];
|
struct gl_fixedfunc_texture_unit FixedFuncUnit[MAX_TEXTURE_COORD_UNITS];
|
||||||
GLfloat LodBias[MAX_TEXTURE_UNITS];
|
GLfloat LodBias[MAX_TEXTURE_UNITS];
|
||||||
|
float LodBiasQuantized[MAX_TEXTURE_UNITS];
|
||||||
|
|
||||||
/** Saved default texture object state. */
|
/** Saved default texture object state. */
|
||||||
struct gl_texture_object SavedDefaultObj[NUM_TEXTURE_TARGETS];
|
struct gl_texture_object SavedDefaultObj[NUM_TEXTURE_TARGETS];
|
||||||
|
@@ -692,7 +692,7 @@ set_sampler_lod_bias(struct gl_context *ctx, struct gl_sampler_object *samp,
|
|||||||
|
|
||||||
flush(ctx);
|
flush(ctx);
|
||||||
samp->Attrib.LodBias = param;
|
samp->Attrib.LodBias = param;
|
||||||
samp->Attrib.state.lod_bias = param;
|
samp->Attrib.state.lod_bias = util_quantize_lod_bias(param);
|
||||||
return GL_TRUE;
|
return GL_TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -471,6 +471,7 @@ _mesa_texenvfv_indexed( struct gl_context* ctx, GLuint texunit, GLenum target,
|
|||||||
return;
|
return;
|
||||||
FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT, GL_TEXTURE_BIT);
|
FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT, GL_TEXTURE_BIT);
|
||||||
texUnit->LodBias = param[0];
|
texUnit->LodBias = param[0];
|
||||||
|
texUnit->LodBiasQuantized = util_quantize_lod_bias(param[0]);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
TE_ERROR(GL_INVALID_ENUM, "glTexEnv(pname=%s)", pname);
|
TE_ERROR(GL_INVALID_ENUM, "glTexEnv(pname=%s)", pname);
|
||||||
|
@@ -786,7 +786,7 @@ set_tex_parameterf(struct gl_context *ctx,
|
|||||||
if (texObj->Sampler.Attrib.LodBias != params[0]) {
|
if (texObj->Sampler.Attrib.LodBias != params[0]) {
|
||||||
flush(ctx);
|
flush(ctx);
|
||||||
texObj->Sampler.Attrib.LodBias = params[0];
|
texObj->Sampler.Attrib.LodBias = params[0];
|
||||||
texObj->Sampler.Attrib.state.lod_bias = params[0];
|
texObj->Sampler.Attrib.state.lod_bias = util_quantize_lod_bias(params[0]);
|
||||||
return GL_TRUE;
|
return GL_TRUE;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@@ -76,6 +76,7 @@ _mesa_copy_texture_state( const struct gl_context *src, struct gl_context *dst )
|
|||||||
/* per-unit state */
|
/* per-unit state */
|
||||||
for (u = 0; u < src->Const.MaxCombinedTextureImageUnits; u++) {
|
for (u = 0; u < src->Const.MaxCombinedTextureImageUnits; u++) {
|
||||||
dst->Texture.Unit[u].LodBias = src->Texture.Unit[u].LodBias;
|
dst->Texture.Unit[u].LodBias = src->Texture.Unit[u].LodBias;
|
||||||
|
dst->Texture.Unit[u].LodBiasQuantized = src->Texture.Unit[u].LodBiasQuantized;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX strictly speaking, we should compare texture names/ids and
|
* XXX strictly speaking, we should compare texture names/ids and
|
||||||
|
@@ -94,12 +94,6 @@ st_convert_sampler(const struct st_context *st,
|
|||||||
sampler->normalized_coords = 1;
|
sampler->normalized_coords = 1;
|
||||||
|
|
||||||
sampler->lod_bias += tex_unit_lod_bias;
|
sampler->lod_bias += tex_unit_lod_bias;
|
||||||
/* Reduce the number of states by allowing only the values that AMD GCN
|
|
||||||
* can represent. Apps use lod_bias for smooth transitions to bigger mipmap
|
|
||||||
* levels.
|
|
||||||
*/
|
|
||||||
sampler->lod_bias = CLAMP(sampler->lod_bias, -16, 16);
|
|
||||||
sampler->lod_bias = roundf(sampler->lod_bias * 256) / 256;
|
|
||||||
|
|
||||||
/* Check that only wrap modes using the border color have the first bit
|
/* Check that only wrap modes using the border color have the first bit
|
||||||
* set.
|
* set.
|
||||||
@@ -184,7 +178,7 @@ st_convert_sampler_from_unit(const struct st_context *st,
|
|||||||
|
|
||||||
msamp = _mesa_get_samplerobj(ctx, texUnit);
|
msamp = _mesa_get_samplerobj(ctx, texUnit);
|
||||||
|
|
||||||
st_convert_sampler(st, texobj, msamp, ctx->Texture.Unit[texUnit].LodBias,
|
st_convert_sampler(st, texobj, msamp, ctx->Texture.Unit[texUnit].LodBiasQuantized,
|
||||||
sampler);
|
sampler);
|
||||||
|
|
||||||
sampler->seamless_cube_map |= ctx->Texture.CubeMapSeamless;
|
sampler->seamless_cube_map |= ctx->Texture.CubeMapSeamless;
|
||||||
|
@@ -780,6 +780,20 @@ util_is_vbo_upload_ratio_too_large(unsigned draw_vertex_count,
|
|||||||
|
|
||||||
bool util_invert_mat4x4(float *out, const float *m);
|
bool util_invert_mat4x4(float *out, const float *m);
|
||||||
|
|
||||||
|
/* Quantize the lod bias value to reduce the number of sampler state
|
||||||
|
* variants in gallium because apps use it for smooth mipmap transitions,
|
||||||
|
* thrashing cso_cache and degrading performance.
|
||||||
|
*
|
||||||
|
* This quantization matches the AMD hw specification, so having more
|
||||||
|
* precision would have no effect anyway.
|
||||||
|
*/
|
||||||
|
static inline float
|
||||||
|
util_quantize_lod_bias(float lod)
|
||||||
|
{
|
||||||
|
lod = CLAMP(lod, -16, 16);
|
||||||
|
return roundf(lod * 256) / 256;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user