i965: Store image_param in brw_context instead of prog_data

This burns an extra 10k of memory or so in the case where you don't have
any images.  However, if you have several shaders which use images, this
should be much less memory.  It also gets rid of a part of prog_data
that really has nothing to do with the compiler.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Jason Ekstrand
2017-09-28 19:04:31 -07:00
parent 6ee4b352c9
commit cfc7ed75eb
14 changed files with 17 additions and 49 deletions

View File

@@ -574,7 +574,6 @@ struct brw_stage_prog_data {
GLuint nr_params; /**< number of float params/constants */
GLuint nr_pull_params;
unsigned nr_image_params;
unsigned curb_read_length;
unsigned total_scratch;
@@ -596,9 +595,6 @@ struct brw_stage_prog_data {
*/
uint32_t *param;
uint32_t *pull_param;
/** Image metadata passed to the shader as uniforms. */
struct brw_image_param *image_param;
};
static inline void

View File

@@ -37,7 +37,7 @@ anv_shader_bin_create(struct anv_device *device,
struct anv_shader_bin *shader;
struct anv_shader_bin_key *key;
struct brw_stage_prog_data *prog_data;
const union gl_constant_value **prog_data_param;
uint32_t *prog_data_param;
struct anv_pipeline_binding *surface_to_descriptor, *sampler_to_descriptor;
ANV_MULTIALLOC(ma);
@@ -169,9 +169,7 @@ anv_shader_bin_create_from_blob(struct anv_device *device,
*
* - Review prog_data struct for size and cacheability: struct
* brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
* bit quantities etc; param, pull_param, and image_params are pointers, we
* just need the compation map. use bit fields for all bools, eg
* dual_src_blend.
* bit quantities etc; use bit fields for all bools, eg dual_src_blend.
*/
static uint32_t

View File

@@ -580,6 +580,8 @@ struct brw_stage_state
uint32_t sampler_count;
uint32_t sampler_offset;
struct brw_image_param image_param[BRW_MAX_IMAGES];
/** Need to re-emit 3DSTATE_CONSTANT_XS? */
bool push_constants_dirty;
};

View File

@@ -91,11 +91,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count);
prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
prog_data.base.image_param =
rzalloc_array(NULL, struct brw_image_param,
cp->program.info.num_images);
prog_data.base.nr_params = param_count;
prog_data.base.nr_image_params = cp->program.info.num_images;
brw_nir_setup_glsl_uniforms(cp->program.nir, &cp->program,&prog_data.base,
true);

View File

@@ -227,7 +227,7 @@ brw_upload_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.wm_start * 16;
/* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
brw_populate_constant_data(brw, fp, brw->wm.base.prog_data, &buf[offset],
brw_populate_constant_data(brw, fp, &brw->wm.base, &buf[offset],
brw->wm.base.prog_data->param,
brw->wm.base.prog_data->nr_params);
}
@@ -268,7 +268,7 @@ brw_upload_constant_buffer(struct brw_context *brw)
GLuint offset = brw->curbe.vs_start * 16;
/* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
brw_populate_constant_data(brw, vp, brw->vs.base.prog_data, &buf[offset],
brw_populate_constant_data(brw, vp, &brw->vs.base, &buf[offset],
brw->vs.base.prog_data->param,
brw->vs.base.prog_data->nr_params);
}

View File

@@ -101,11 +101,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count);
prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
prog_data.base.base.image_param =
rzalloc_array(NULL, struct brw_image_param,
gp->program.info.num_images);
prog_data.base.base.nr_params = param_count;
prog_data.base.base.nr_image_params = gp->program.info.num_images;
brw_nir_setup_glsl_uniforms(gp->program.nir, &gp->program,
&prog_data.base.base,

View File

@@ -618,7 +618,6 @@ brw_stage_prog_data_free(const void *p)
ralloc_free(prog_data->param);
ralloc_free(prog_data->pull_param);
ralloc_free(prog_data->image_param);
}
void

View File

@@ -237,7 +237,7 @@ void brw_emit_sampler_state(struct brw_context *brw,
void
brw_populate_constant_data(struct brw_context *brw,
const struct gl_program *prog,
const struct brw_stage_prog_data *prog_data,
const struct brw_stage_state *stage_state,
void *dst,
const uint32_t *param,
unsigned nr_params);

View File

@@ -196,11 +196,6 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
brw_assign_common_binding_table_offsets(devinfo, &tcp->program,
&prog_data.base.base, 0);
prog_data.base.base.image_param =
rzalloc_array(NULL, struct brw_image_param,
tcp->program.info.num_images);
prog_data.base.base.nr_image_params = tcp->program.info.num_images;
brw_nir_setup_glsl_uniforms(nir, &tcp->program, &prog_data.base.base,
compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir,

View File

@@ -92,11 +92,7 @@ brw_codegen_tes_prog(struct brw_context *brw,
prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count);
prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
prog_data.base.base.image_param =
rzalloc_array(NULL, struct brw_image_param,
tep->program.info.num_images);
prog_data.base.base.nr_params = param_count;
prog_data.base.base.nr_image_params = tep->program.info.num_images;
brw_nir_setup_glsl_uniforms(nir, &tep->program, &prog_data.base.base,
compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);

View File

@@ -184,8 +184,6 @@ brw_codegen_vs_prog(struct brw_context *brw,
*/
int param_count = vp->program.nir->num_uniforms / 4;
prog_data.base.base.nr_image_params = vp->program.info.num_images;
/* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
* planes as uniforms.
*/
@@ -193,9 +191,6 @@ brw_codegen_vs_prog(struct brw_context *brw,
stage_prog_data->param = rzalloc_array(NULL, uint32_t, param_count);
stage_prog_data->pull_param = rzalloc_array(NULL, uint32_t, param_count);
stage_prog_data->image_param =
rzalloc_array(NULL, struct brw_image_param,
stage_prog_data->nr_image_params);
stage_prog_data->nr_params = param_count;
if (!vp->program.is_arb_asm) {

View File

@@ -150,14 +150,10 @@ brw_codegen_wm_prog(struct brw_context *brw,
* by the state cache.
*/
int param_count = fp->program.nir->num_uniforms / 4;
prog_data.base.nr_image_params = fp->program.info.num_images;
/* The backend also sometimes adds params for texture size. */
param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count);
prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count);
prog_data.base.image_param =
rzalloc_array(NULL, struct brw_image_param,
prog_data.base.nr_image_params);
prog_data.base.nr_params = param_count;
if (!fp->program.is_arb_asm) {

View File

@@ -1630,7 +1630,7 @@ brw_upload_image_surfaces(struct brw_context *brw,
update_image_surface(brw, u, prog->sh.ImageAccess[i],
surf_idx,
&stage_state->surf_offset[surf_idx],
&prog_data->image_param[i]);
&stage_state->image_param[i]);
}
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;

View File

@@ -38,7 +38,7 @@ f_as_u32(float f)
static uint32_t
brw_param_value(struct brw_context *brw,
const struct gl_program *prog,
const struct brw_stage_prog_data *prog_data,
const struct brw_stage_state *stage_state,
uint32_t param)
{
struct gl_context *ctx = &brw->ctx;
@@ -80,9 +80,8 @@ brw_param_value(struct brw_context *brw,
case BRW_PARAM_DOMAIN_IMAGE: {
unsigned idx = BRW_PARAM_IMAGE_IDX(param);
unsigned offset = BRW_PARAM_IMAGE_OFFSET(param);
assert(idx < prog_data->nr_image_params);
assert(offset < sizeof(struct brw_image_param));
return ((uint32_t *)&prog_data->image_param[idx])[offset];
assert(offset < ARRAY_SIZE(stage_state->image_param));
return ((uint32_t *)&stage_state->image_param[idx])[offset];
}
default:
@@ -94,14 +93,14 @@ brw_param_value(struct brw_context *brw,
void
brw_populate_constant_data(struct brw_context *brw,
const struct gl_program *prog,
const struct brw_stage_prog_data *prog_data,
const struct brw_stage_state *stage_state,
void *void_dst,
const uint32_t *param,
unsigned nr_params)
{
uint32_t *dst = void_dst;
for (unsigned i = 0; i < nr_params; i++)
dst[i] = brw_param_value(brw, prog, prog_data, param[i]);
dst[i] = brw_param_value(brw, prog, stage_state, param[i]);
}
@@ -159,7 +158,7 @@ gen6_upload_push_constants(struct brw_context *brw,
* side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
* wouldn't be set for them.
*/
brw_populate_constant_data(brw, prog, prog_data, param,
brw_populate_constant_data(brw, prog, stage_state, param,
prog_data->param,
prog_data->nr_params);
@@ -246,7 +245,7 @@ brw_upload_pull_constants(struct brw_context *brw,
STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
brw_populate_constant_data(brw, prog, prog_data, constants,
brw_populate_constant_data(brw, prog, stage_state, constants,
prog_data->pull_param,
prog_data->nr_pull_params);
@@ -312,7 +311,7 @@ brw_upload_cs_push_constants(struct brw_context *brw,
for (unsigned i = 0;
i < cs_prog_data->push.cross_thread.dwords;
i++) {
param_copy[i] = brw_param_value(brw, prog, prog_data,
param_copy[i] = brw_param_value(brw, prog, stage_state,
prog_data->param[i]);
}
}
@@ -325,7 +324,7 @@ brw_upload_cs_push_constants(struct brw_context *brw,
unsigned src = cs_prog_data->push.cross_thread.dwords;
for ( ; src < prog_data->nr_params; src++, dst++) {
if (src != cs_prog_data->thread_local_id_index) {
param[dst] = brw_param_value(brw, prog, prog_data,
param[dst] = brw_param_value(brw, prog, stage_state,
prog_data->param[src]);
} else {
param[dst] = t * cs_prog_data->simd_size;