i965: Use 0 for the number of binding table entries in 3DSTATE_(VS|WM).

These fields control how many entries the hardware prefetches into the
state cache, so they only impact performance, not correctness.  However,
it's not clear how to use this in a way that's beneficial.

According to the documentation, kernels "using a large number" of
entries may wish to program this to zero to avoid thrashing the cache;
it's unclear how many is too many.  Also, Ironlake's WM was missing this
feature entirely---the count had to be zero.

The dirty bit tracking to handle this complicates the surface state
and binding table setup; removing it should simplify things and make
future refactoring easier.  So just set 0 for the number of entries
rather than trying to compute and track it.

Appears to have no impact on Nexuiz and OpenArena on Sandybridge.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Paul Berry <stereotype441@gmail.com>
This commit is contained in:
Kenneth Graunke
2011-11-01 10:54:08 -07:00
parent 0983c6869b
commit 6ba9090ea0
10 changed files with 6 additions and 50 deletions

View File

@@ -169,8 +169,6 @@ enum brw_state_id {
*/
#define BRW_NEW_BATCH (1 << BRW_STATE_BATCH)
/** \see brw.state.depth_region */
#define BRW_NEW_NR_WM_SURFACES (1 << BRW_STATE_NR_WM_SURFACES)
#define BRW_NEW_NR_VS_SURFACES (1 << BRW_STATE_NR_VS_SURFACES)
#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER)
#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF)
#define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF)
@@ -739,7 +737,6 @@ struct brw_context
/** Binding table of pointers to surf_bo entries */
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_VS_MAX_SURF];
GLuint nr_surfaces;
uint32_t push_const_offset; /* Offset in the batchbuffer */
int push_const_size; /* in 256-bit register increments */
@@ -810,7 +807,6 @@ struct brw_context
uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
GLuint render_surf;
GLuint nr_surfaces;
drm_intel_bo *scratch_bo;

View File

@@ -357,8 +357,6 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
DEFINE_BIT(BRW_NEW_VERTICES),
DEFINE_BIT(BRW_NEW_BATCH),
DEFINE_BIT(BRW_NEW_NR_WM_SURFACES),
DEFINE_BIT(BRW_NEW_NR_VS_SURFACES),
DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
DEFINE_BIT(BRW_NEW_WM_CONSTBUF),
DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),

View File

@@ -71,11 +71,7 @@ brw_upload_vs_unit(struct brw_context *brw)
*/
vs->thread1.single_program_flow = (intel->gen == 5);
/* BRW_NEW_NR_VS_SURFACES */
if (intel->gen == 5)
vs->thread1.binding_table_entry_count = 0; /* hardware requirement */
else
vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
vs->thread1.binding_table_entry_count = 0;
if (brw->vs.prog_data->total_scratch != 0) {
vs->thread2.scratch_space_base_pointer =
@@ -176,7 +172,6 @@ const struct brw_tracked_state brw_vs_unit = {
.brw = (BRW_NEW_BATCH |
BRW_NEW_PROGRAM_CACHE |
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE |
BRW_NEW_VERTEX_PROGRAM),
.cache = CACHE_NEW_VS_PROG

View File

@@ -136,9 +136,6 @@ brw_update_vs_constant_surface( struct gl_context *ctx,
/**
* Vertex shader surfaces (constant buffer).
*
* This consumes the state updates for the constant buffer needing
* to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit.
*/
static void
brw_upload_vs_surfaces(struct brw_context *brw)
@@ -170,11 +167,6 @@ brw_upload_vs_surfaces(struct brw_context *brw)
brw->vs.bind_bo_offset = 0;
}
}
if (brw->vs.nr_surfaces != nr_surfaces) {
brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
brw->vs.nr_surfaces = nr_surfaces;
}
}
const struct brw_tracked_state brw_vs_surfaces = {

View File

@@ -113,12 +113,7 @@ brw_upload_wm_unit(struct brw_context *brw)
wm->thread1.depth_coef_urb_read_offset = 1;
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
if (intel->gen == 5)
wm->thread1.binding_table_entry_count = 0; /* hardware requirement */
else {
/* BRW_NEW_NR_SURFACES */
wm->thread1.binding_table_entry_count = brw->wm.nr_surfaces;
}
wm->thread1.binding_table_entry_count = 0;
if (brw->wm.prog_data->total_scratch != 0) {
wm->thread2.scratch_space_base_pointer =
@@ -263,8 +258,7 @@ const struct brw_tracked_state brw_wm_unit = {
.brw = (BRW_NEW_BATCH |
BRW_NEW_PROGRAM_CACHE |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_WM_SURFACES),
BRW_NEW_CURBE_OFFSETS),
.cache = (CACHE_NEW_WM_PROG |
CACHE_NEW_SAMPLER)

View File

@@ -624,11 +624,6 @@ brw_upload_wm_surfaces(struct brw_context *brw)
}
}
if (brw->wm.nr_surfaces != nr_surfaces) {
brw->wm.nr_surfaces = nr_surfaces;
brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
}
brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}

View File

@@ -167,9 +167,7 @@ upload_vs_state(struct brw_context *brw)
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_BATCH(brw->vs.prog_offset);
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
floating_point_mode |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
OUT_BATCH(floating_point_mode | (0 << GEN6_VS_SAMPLER_COUNT_SHIFT));
if (brw->vs.prog_data->total_scratch) {
OUT_RELOC(brw->vs.scratch_bo,
@@ -220,8 +218,7 @@ upload_vs_state(struct brw_context *brw)
const struct brw_tracked_state gen6_vs_state = {
.dirty = {
.mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE |
.brw = (BRW_NEW_URB_FENCE |
BRW_NEW_CONTEXT |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_BATCH),

View File

@@ -140,9 +140,6 @@ upload_wm_state(struct brw_context *brw)
if (ctx->Shader.CurrentFragmentProgram == NULL)
dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
/* BRW_NEW_NR_WM_SURFACES */
dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
/* CACHE_NEW_SAMPLER */
dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
dw4 |= (brw->wm.prog_data->first_curbe_grf <<
@@ -217,7 +214,6 @@ const struct brw_tracked_state gen6_wm_state = {
_NEW_PROGRAM_CONSTANTS |
_NEW_POLYGON),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_NR_WM_SURFACES |
BRW_NEW_URB_FENCE |
BRW_NEW_BATCH),
.cache = (CACHE_NEW_SAMPLER |

View File

@@ -75,9 +75,7 @@ upload_vs_state(struct brw_context *brw)
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_BATCH(brw->vs.prog_offset);
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
floating_point_mode |
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | floating_point_mode);
if (brw->vs.prog_data->total_scratch) {
OUT_RELOC(brw->vs.scratch_bo,
@@ -101,7 +99,6 @@ const struct brw_tracked_state gen7_vs_state = {
.dirty = {
.mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE |
BRW_NEW_CONTEXT |
BRW_NEW_VERTEX_PROGRAM |

View File

@@ -146,9 +146,6 @@ upload_ps_state(struct brw_context *brw)
dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
/* BRW_NEW_NR_WM_SURFACES */
dw2 |= brw->wm.nr_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
/* Use ALT floating point mode for ARB fragment programs, because they
* require 0^0 == 1.
*/
@@ -198,7 +195,6 @@ const struct brw_tracked_state gen7_ps_state = {
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_NR_WM_SURFACES |
BRW_NEW_PS_BINDING_TABLE |
BRW_NEW_URB_FENCE |
BRW_NEW_BATCH),