i965: Use a single binding table for all pipeline stages.

Although the hardware supports separate binding tables for each pipeline
stage, we don't see much advantage over a single shared table.

Consider the contents of the binding table:
- Textures (16)
- Draw buffers (8)
- Pull constant buffers (1 for VS, 1 for WM)

OpenGL's texture bindings are global: the same set of textures is
available to all shader targets.  So our binding table entries for
textures would be exactly the same in every table.

There are only two pull constant buffers (not many), and although draw
buffers aren't interesting to the VS, it shouldn't hurt to have them in
the table.  The hardware supports up to 254 binding table entries, and
we currently only use 26.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Paul Berry <stereotype441@gmail.com>
This commit is contained in:
Kenneth Graunke
2011-10-30 16:03:13 -07:00
parent 4a42bd3931
commit e7c29c5de8
9 changed files with 80 additions and 85 deletions

View File

@@ -404,31 +404,48 @@ struct brw_vs_ouput_sizes {
#define BRW_MAX_DRAW_BUFFERS 8
/**
* Size of our surface binding table for the WM.
* This contains pointers to the drawing surfaces and current texture
* objects and shader constant buffers (+2).
*/
#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
/**
* Helpers to convert drawing buffers, textures and constant buffers
* to surface binding table indexes, for WM.
* Helpers to create Surface Binding Table indexes for draw buffers,
* textures, and constant buffers.
*
* Shader threads access surfaces via numeric handles, rather than directly
* using pointers. The binding table maps these numeric handles to the
* address of the actual buffer.
*
* For example, a shader might ask to sample from "surface 7." In this case,
* bind[7] would contain a pointer to a texture.
*
* Although the hardware supports separate binding tables per pipeline stage
* (VS, HS, DS, GS, PS), we currently share a single binding table for all of
* them. This is purely for convenience.
*
* Currently our binding tables are (arbitrarily) programmed as follows:
*
* +-------------------------------+
* | 0 | Draw buffer 0 | .
* | . | . | \
* | : | : | > Only relevant to the WM.
* | 7 | Draw buffer 7 | /
* |-----|-------------------------| `
* | 8 | VS Pull Constant Buffer |
* | 9 | WM Pull Constant Buffer |
* |-----|-------------------------|
* | 10 | Texture 0 |
* | . | . |
* | : | : |
* | 25 | Texture 15 |
* +-------------------------------+
*
* Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
* the identity function or things will break. We do want to keep draw buffers
* first so we can use headerless render target writes for RT 0.
*/
#define SURF_INDEX_DRAW(d) (d)
#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS)
#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t))
/**
* Size of surface binding table for the VS.
* Only one constant buffer for now.
*/
#define BRW_VS_MAX_SURF 1
/**
* Only a VS constant buffer
*/
#define SURF_INDEX_VERT_CONST_BUFFER 0
#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0)
#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t))
/** Maximum size of the binding table. */
#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2)
enum brw_cache_id {
BRW_BLEND_STATE,
@@ -722,6 +739,12 @@ struct brw_context
GLuint last_bufsz;
} curbe;
struct {
/** Binding table of pointers to surf_bo entries */
uint32_t bo_offset;
uint32_t surf_offset[BRW_MAX_SURFACES];
} bind;
struct {
struct brw_vs_prog_data *prog_data;
int8_t *constant_map; /* variable array following prog_data */
@@ -732,10 +755,6 @@ struct brw_context
uint32_t prog_offset;
uint32_t state_offset;
/** Binding table of pointers to surf_bo entries */
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_VS_MAX_SURF];
uint32_t push_const_offset; /* Offset in the batchbuffer */
int push_const_size; /* in 256-bit register increments */
@@ -814,9 +833,6 @@ struct brw_context
/** Offset in the program cache to the WM program */
uint32_t prog_offset;
/** Binding table of pointers to surf_bo entries */
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_WM_MAX_SURF];
uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
drm_intel_bo *const_bo; /* pull constant buffer. */

View File

@@ -76,11 +76,11 @@ static void upload_binding_table_pointers(struct brw_context *brw)
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
OUT_BATCH(brw->vs.bind_bo_offset);
OUT_BATCH(brw->bind.bo_offset);
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
OUT_BATCH(brw->wm.bind_bo_offset);
OUT_BATCH(brw->bind.bo_offset);
ADVANCE_BATCH();
}
@@ -114,9 +114,9 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
GEN6_BINDING_TABLE_MODIFY_GS |
GEN6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
OUT_BATCH(brw->bind.bo_offset); /* vs */
OUT_BATCH(0); /* gs */
OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
ADVANCE_BATCH();
}

View File

@@ -71,7 +71,7 @@ extern const struct brw_tracked_state brw_wm_prog;
extern const struct brw_tracked_state brw_wm_samplers;
extern const struct brw_tracked_state brw_renderbuffer_surfaces;
extern const struct brw_tracked_state brw_texture_surfaces;
extern const struct brw_tracked_state brw_wm_binding_table;
extern const struct brw_tracked_state brw_binding_table;
extern const struct brw_tracked_state brw_wm_unit;
extern const struct brw_tracked_state brw_psp_urb_cbs;

View File

@@ -69,7 +69,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
&brw_vs_surfaces, /* must do before unit */
&brw_renderbuffer_surfaces, /* must do before unit */
&brw_texture_surfaces, /* must do before unit */
&brw_wm_binding_table,
&brw_binding_table,
&brw_wm_samplers,
/* These set up state for brw_psp_urb_cbs */
@@ -141,7 +141,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_vs_surfaces, /* must do before unit */
&brw_renderbuffer_surfaces, /* must do before unit */
&brw_texture_surfaces, /* must do before unit */
&brw_wm_binding_table,
&brw_binding_table,
&brw_wm_samplers,
&gen6_sampler_state,
@@ -206,7 +206,7 @@ const struct brw_tracked_state *gen7_atoms[] =
&brw_vs_surfaces, /* must do before unit */
&brw_renderbuffer_surfaces, /* must do before unit */
&brw_texture_surfaces, /* must do before unit */
&brw_wm_binding_table,
&brw_binding_table,
&gen7_samplers,

View File

@@ -119,19 +119,17 @@ brw_update_vs_constant_surface( struct gl_context *ctx,
(struct brw_vertex_program *) brw->vertex_program;
const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
assert(surf == 0);
/* If there's no constant buffer, then no surface BO is needed to point at
* it.
*/
if (brw->vs.const_bo == NULL) {
brw->vs.surf_offset[surf] = 0;
brw->bind.surf_offset[surf] = 0;
return;
}
intel->vtbl.create_constant_surface(brw, brw->vs.const_bo,
params->NumParameters,
&brw->vs.surf_offset[surf]);
&brw->bind.surf_offset[surf]);
}
/**
@@ -141,32 +139,11 @@ static void
brw_upload_vs_surfaces(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
uint32_t *bind;
int i;
int nr_surfaces = 0;
/* BRW_NEW_VS_CONSTBUF */
if (brw->vs.const_bo) {
nr_surfaces = 1;
brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
}
if (nr_surfaces != 0) {
bind = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
sizeof(uint32_t) * nr_surfaces,
32, &brw->vs.bind_bo_offset);
for (i = 0; i < nr_surfaces; i++) {
/* BRW_NEW_VS_CONSTBUF */
bind[i] = brw->vs.surf_offset[i];
}
brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
} else {
if (brw->vs.bind_bo_offset) {
brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
brw->vs.bind_bo_offset = 0;
}
}
}
const struct brw_tracked_state brw_vs_surfaces = {

View File

@@ -271,7 +271,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth);
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
6 * 4, 32, &brw->wm.surf_offset[surf_index]);
6 * 4, 32, &brw->bind.surf_offset[surf_index]);
surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
@@ -298,7 +298,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
/* Emit relocation to surface contents */
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
brw->wm.surf_offset[surf_index] + 4,
brw->bind.surf_offset[surf_index] + 4,
intelObj->mt->region->bo, 0,
I915_GEM_DOMAIN_SAMPLER, 0);
}
@@ -375,7 +375,7 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
if (brw->wm.const_bo) {
drm_intel_bo_unreference(brw->wm.const_bo);
brw->wm.const_bo = NULL;
brw->wm.surf_offset[surf_index] = 0;
brw->bind.surf_offset[surf_index] = 0;
brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
return;
@@ -396,7 +396,7 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
intel->vtbl.create_constant_surface(brw, brw->wm.const_bo,
params->NumParameters,
&brw->wm.surf_offset[surf_index]);
&brw->bind.surf_offset[surf_index]);
brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
}
@@ -417,7 +417,7 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
uint32_t *surf;
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
6 * 4, 32, &brw->wm.surf_offset[unit]);
6 * 4, 32, &brw->bind.surf_offset[unit]);
surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
@@ -453,7 +453,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
uint32_t format = 0;
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
6 * 4, 32, &brw->wm.surf_offset[unit]);
6 * 4, 32, &brw->bind.surf_offset[unit]);
switch (irb->Base.Format) {
case MESA_FORMAT_XRGB8888:
@@ -534,7 +534,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
}
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
brw->wm.surf_offset[unit] + 4,
brw->bind.surf_offset[unit] + 4,
region->bo,
surf[1] - region->bo->offset,
I915_GEM_DOMAIN_RENDER,
@@ -593,7 +593,7 @@ brw_update_texture_surfaces(struct brw_context *brw)
if (texUnit->_ReallyEnabled) {
brw->intel.vtbl.update_texture_surface(ctx, i);
} else {
brw->wm.surf_offset[surf] = 0;
brw->bind.surf_offset[surf] = 0;
}
}
@@ -614,7 +614,7 @@ const struct brw_tracked_state brw_texture_surfaces = {
* numbers to surface state objects.
*/
static void
brw_wm_upload_binding_table(struct brw_context *brw)
brw_upload_binding_table(struct brw_context *brw)
{
uint32_t *bind;
int i;
@@ -623,25 +623,27 @@ brw_wm_upload_binding_table(struct brw_context *brw)
* space for the binding table.
*/
bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
sizeof(uint32_t) * BRW_WM_MAX_SURF,
32, &brw->wm.bind_bo_offset);
sizeof(uint32_t) * BRW_MAX_SURFACES,
32, &brw->bind.bo_offset);
for (i = 0; i < BRW_WM_MAX_SURF; i++) {
/* BRW_NEW_WM_SURFACES */
bind[i] = brw->wm.surf_offset[i];
/* BRW_NEW_WM_SURFACES and BRW_NEW_VS_CONSTBUF */
for (i = 0; i < BRW_MAX_SURFACES; i++) {
bind[i] = brw->bind.surf_offset[i];
}
brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
}
const struct brw_tracked_state brw_wm_binding_table = {
const struct brw_tracked_state brw_binding_table = {
.dirty = {
.mesa = 0,
.brw = (BRW_NEW_BATCH |
BRW_NEW_VS_CONSTBUF |
BRW_NEW_WM_SURFACES),
.cache = 0
},
.emit = brw_wm_upload_binding_table,
.emit = brw_upload_binding_table,
};
void

View File

@@ -37,7 +37,7 @@ upload_vs_state(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
OUT_BATCH(brw->vs.bind_bo_offset);
OUT_BATCH(brw->bind.bo_offset);
ADVANCE_BATCH();
if (brw->vs.push_const_size == 0) {

View File

@@ -104,7 +104,7 @@ upload_ps_state(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2));
OUT_BATCH(brw->wm.bind_bo_offset);
OUT_BATCH(brw->bind.bo_offset);
ADVANCE_BATCH();
/* CACHE_NEW_SAMPLER */

View File

@@ -68,7 +68,7 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit)
intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth);
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
sizeof(*surf), 32, &brw->wm.surf_offset[surf_index]);
sizeof(*surf), 32, &brw->bind.surf_offset[surf_index]);
memset(surf, 0, sizeof(*surf));
surf->ss0.surface_type = translate_tex_target(tObj->Target);
@@ -118,7 +118,7 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit)
/* Emit relocation to surface contents */
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
brw->wm.surf_offset[surf_index] +
brw->bind.surf_offset[surf_index] +
offsetof(struct gen7_surface_state, ss1),
intelObj->mt->region->bo, 0,
I915_GEM_DOMAIN_SAMPLER, 0);
@@ -172,7 +172,7 @@ gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit)
struct gen7_surface_state *surf;
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
sizeof(*surf), 32, &brw->wm.surf_offset[unit]);
sizeof(*surf), 32, &brw->bind.surf_offset[unit]);
memset(surf, 0, sizeof(*surf));
surf->ss0.surface_type = BRW_SURFACE_NULL;
@@ -197,7 +197,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
uint32_t tile_x, tile_y;
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
sizeof(*surf), 32, &brw->wm.surf_offset[unit]);
sizeof(*surf), 32, &brw->bind.surf_offset[unit]);
memset(surf, 0, sizeof(*surf));
switch (irb->Base.Format) {
@@ -252,7 +252,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
surf->ss3.pitch = (region->pitch * region->cpp) - 1;
drm_intel_bo_emit_reloc(brw->intel.batch.bo,
brw->wm.surf_offset[unit] +
brw->bind.surf_offset[unit] +
offsetof(struct gen7_surface_state, ss1),
region->bo,
surf->ss1.base_addr - region->bo->offset,