i965: Use 0 for the number of binding table entries in 3DSTATE_(VS|WM).
These fields control how many entries the hardware prefetches into the state cache, so they only impact performance, not correctness. However, it's not clear how to use this in a way that's beneficial. According to the documentation, kernels "using a large number" of entries may wish to program this to zero to avoid thrashing the cache; it's unclear how many is too many. Also, Ironlake's WM was missing this feature entirely---the count had to be zero. The dirty bit tracking to handle this complicates the surface state and binding table setup; removing it should simplify things and make future refactoring easier. So just set 0 for the number of entries rather than trying to compute and track it. Appears to have no impact on Nexuiz and OpenArena on Sandybridge. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Paul Berry <stereotype441@gmail.com>
This commit is contained in:
@@ -169,8 +169,6 @@ enum brw_state_id {
|
||||
*/
|
||||
#define BRW_NEW_BATCH (1 << BRW_STATE_BATCH)
|
||||
/** \see brw.state.depth_region */
|
||||
#define BRW_NEW_NR_WM_SURFACES (1 << BRW_STATE_NR_WM_SURFACES)
|
||||
#define BRW_NEW_NR_VS_SURFACES (1 << BRW_STATE_NR_VS_SURFACES)
|
||||
#define BRW_NEW_INDEX_BUFFER (1 << BRW_STATE_INDEX_BUFFER)
|
||||
#define BRW_NEW_VS_CONSTBUF (1 << BRW_STATE_VS_CONSTBUF)
|
||||
#define BRW_NEW_WM_CONSTBUF (1 << BRW_STATE_WM_CONSTBUF)
|
||||
@@ -739,7 +737,6 @@ struct brw_context
|
||||
/** Binding table of pointers to surf_bo entries */
|
||||
uint32_t bind_bo_offset;
|
||||
uint32_t surf_offset[BRW_VS_MAX_SURF];
|
||||
GLuint nr_surfaces;
|
||||
|
||||
uint32_t push_const_offset; /* Offset in the batchbuffer */
|
||||
int push_const_size; /* in 256-bit register increments */
|
||||
@@ -810,7 +807,6 @@ struct brw_context
|
||||
uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
|
||||
|
||||
GLuint render_surf;
|
||||
GLuint nr_surfaces;
|
||||
|
||||
drm_intel_bo *scratch_bo;
|
||||
|
||||
|
@@ -357,8 +357,6 @@ static struct dirty_bit_map brw_bits[] = {
|
||||
DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
|
||||
DEFINE_BIT(BRW_NEW_VERTICES),
|
||||
DEFINE_BIT(BRW_NEW_BATCH),
|
||||
DEFINE_BIT(BRW_NEW_NR_WM_SURFACES),
|
||||
DEFINE_BIT(BRW_NEW_NR_VS_SURFACES),
|
||||
DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
|
||||
DEFINE_BIT(BRW_NEW_WM_CONSTBUF),
|
||||
DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
|
||||
|
@@ -71,11 +71,7 @@ brw_upload_vs_unit(struct brw_context *brw)
|
||||
*/
|
||||
vs->thread1.single_program_flow = (intel->gen == 5);
|
||||
|
||||
/* BRW_NEW_NR_VS_SURFACES */
|
||||
if (intel->gen == 5)
|
||||
vs->thread1.binding_table_entry_count = 0; /* hardware requirement */
|
||||
else
|
||||
vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
|
||||
vs->thread1.binding_table_entry_count = 0;
|
||||
|
||||
if (brw->vs.prog_data->total_scratch != 0) {
|
||||
vs->thread2.scratch_space_base_pointer =
|
||||
@@ -176,7 +172,6 @@ const struct brw_tracked_state brw_vs_unit = {
|
||||
.brw = (BRW_NEW_BATCH |
|
||||
BRW_NEW_PROGRAM_CACHE |
|
||||
BRW_NEW_CURBE_OFFSETS |
|
||||
BRW_NEW_NR_VS_SURFACES |
|
||||
BRW_NEW_URB_FENCE |
|
||||
BRW_NEW_VERTEX_PROGRAM),
|
||||
.cache = CACHE_NEW_VS_PROG
|
||||
|
@@ -136,9 +136,6 @@ brw_update_vs_constant_surface( struct gl_context *ctx,
|
||||
|
||||
/**
|
||||
* Vertex shader surfaces (constant buffer).
|
||||
*
|
||||
* This consumes the state updates for the constant buffer needing
|
||||
* to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit.
|
||||
*/
|
||||
static void
|
||||
brw_upload_vs_surfaces(struct brw_context *brw)
|
||||
@@ -170,11 +167,6 @@ brw_upload_vs_surfaces(struct brw_context *brw)
|
||||
brw->vs.bind_bo_offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (brw->vs.nr_surfaces != nr_surfaces) {
|
||||
brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
|
||||
brw->vs.nr_surfaces = nr_surfaces;
|
||||
}
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_vs_surfaces = {
|
||||
|
@@ -113,12 +113,7 @@ brw_upload_wm_unit(struct brw_context *brw)
|
||||
wm->thread1.depth_coef_urb_read_offset = 1;
|
||||
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
|
||||
|
||||
if (intel->gen == 5)
|
||||
wm->thread1.binding_table_entry_count = 0; /* hardware requirement */
|
||||
else {
|
||||
/* BRW_NEW_NR_SURFACES */
|
||||
wm->thread1.binding_table_entry_count = brw->wm.nr_surfaces;
|
||||
}
|
||||
wm->thread1.binding_table_entry_count = 0;
|
||||
|
||||
if (brw->wm.prog_data->total_scratch != 0) {
|
||||
wm->thread2.scratch_space_base_pointer =
|
||||
@@ -263,8 +258,7 @@ const struct brw_tracked_state brw_wm_unit = {
|
||||
.brw = (BRW_NEW_BATCH |
|
||||
BRW_NEW_PROGRAM_CACHE |
|
||||
BRW_NEW_FRAGMENT_PROGRAM |
|
||||
BRW_NEW_CURBE_OFFSETS |
|
||||
BRW_NEW_NR_WM_SURFACES),
|
||||
BRW_NEW_CURBE_OFFSETS),
|
||||
|
||||
.cache = (CACHE_NEW_WM_PROG |
|
||||
CACHE_NEW_SAMPLER)
|
||||
|
@@ -624,11 +624,6 @@ brw_upload_wm_surfaces(struct brw_context *brw)
|
||||
}
|
||||
}
|
||||
|
||||
if (brw->wm.nr_surfaces != nr_surfaces) {
|
||||
brw->wm.nr_surfaces = nr_surfaces;
|
||||
brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
|
||||
}
|
||||
|
||||
brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
|
||||
}
|
||||
|
||||
|
@@ -167,9 +167,7 @@ upload_vs_state(struct brw_context *brw)
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
|
||||
OUT_BATCH(brw->vs.prog_offset);
|
||||
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
|
||||
floating_point_mode |
|
||||
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
|
||||
OUT_BATCH(floating_point_mode | (0 << GEN6_VS_SAMPLER_COUNT_SHIFT));
|
||||
|
||||
if (brw->vs.prog_data->total_scratch) {
|
||||
OUT_RELOC(brw->vs.scratch_bo,
|
||||
@@ -220,8 +218,7 @@ upload_vs_state(struct brw_context *brw)
|
||||
const struct brw_tracked_state gen6_vs_state = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
|
||||
.brw = (BRW_NEW_NR_VS_SURFACES |
|
||||
BRW_NEW_URB_FENCE |
|
||||
.brw = (BRW_NEW_URB_FENCE |
|
||||
BRW_NEW_CONTEXT |
|
||||
BRW_NEW_VERTEX_PROGRAM |
|
||||
BRW_NEW_BATCH),
|
||||
|
@@ -140,9 +140,6 @@ upload_wm_state(struct brw_context *brw)
|
||||
if (ctx->Shader.CurrentFragmentProgram == NULL)
|
||||
dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
|
||||
|
||||
/* BRW_NEW_NR_WM_SURFACES */
|
||||
dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
|
||||
|
||||
/* CACHE_NEW_SAMPLER */
|
||||
dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
|
||||
dw4 |= (brw->wm.prog_data->first_curbe_grf <<
|
||||
@@ -217,7 +214,6 @@ const struct brw_tracked_state gen6_wm_state = {
|
||||
_NEW_PROGRAM_CONSTANTS |
|
||||
_NEW_POLYGON),
|
||||
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
|
||||
BRW_NEW_NR_WM_SURFACES |
|
||||
BRW_NEW_URB_FENCE |
|
||||
BRW_NEW_BATCH),
|
||||
.cache = (CACHE_NEW_SAMPLER |
|
||||
|
@@ -75,9 +75,7 @@ upload_vs_state(struct brw_context *brw)
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
|
||||
OUT_BATCH(brw->vs.prog_offset);
|
||||
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
|
||||
floating_point_mode |
|
||||
(brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
|
||||
OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | floating_point_mode);
|
||||
|
||||
if (brw->vs.prog_data->total_scratch) {
|
||||
OUT_RELOC(brw->vs.scratch_bo,
|
||||
@@ -101,7 +99,6 @@ const struct brw_tracked_state gen7_vs_state = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
|
||||
.brw = (BRW_NEW_CURBE_OFFSETS |
|
||||
BRW_NEW_NR_VS_SURFACES |
|
||||
BRW_NEW_URB_FENCE |
|
||||
BRW_NEW_CONTEXT |
|
||||
BRW_NEW_VERTEX_PROGRAM |
|
||||
|
@@ -146,9 +146,6 @@ upload_ps_state(struct brw_context *brw)
|
||||
|
||||
dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
|
||||
|
||||
/* BRW_NEW_NR_WM_SURFACES */
|
||||
dw2 |= brw->wm.nr_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
|
||||
|
||||
/* Use ALT floating point mode for ARB fragment programs, because they
|
||||
* require 0^0 == 1.
|
||||
*/
|
||||
@@ -198,7 +195,6 @@ const struct brw_tracked_state gen7_ps_state = {
|
||||
.mesa = _NEW_PROGRAM_CONSTANTS,
|
||||
.brw = (BRW_NEW_CURBE_OFFSETS |
|
||||
BRW_NEW_FRAGMENT_PROGRAM |
|
||||
BRW_NEW_NR_WM_SURFACES |
|
||||
BRW_NEW_PS_BINDING_TABLE |
|
||||
BRW_NEW_URB_FENCE |
|
||||
BRW_NEW_BATCH),
|
||||
|
Reference in New Issue
Block a user