intel/vec4: Set up push ranges before we emit any code
In order to avoid switching pull constants to push constants and then having to back to pull, compute the push ranges up-front. This way we know by the time we emit code exactly what ranges are pushable. This is a bit inefficient in the case where the "normal" push constants get compacted. However, most apps don't use giant piles of dead uniforms combined with substantial UBO use so this should be ok. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10571>
This commit is contained in:

committed by
Marge Bot

parent
c35501ffe8
commit
a881f2295f
@@ -920,19 +920,18 @@ vec4_visitor::move_push_constants_to_pull_constants()
|
||||
{
|
||||
int pull_constant_loc[this->uniforms];
|
||||
|
||||
/* Only allow 32 registers (256 uniform components) as push constants,
|
||||
* which is the limit on gfx6.
|
||||
*
|
||||
* If changing this value, note the limitation about total_regs in
|
||||
* brw_curbe.c.
|
||||
*/
|
||||
int max_uniform_components = 32 * 8;
|
||||
const int max_uniform_components = push_length * 8;
|
||||
|
||||
if (this->uniforms * 4 <= max_uniform_components)
|
||||
return;
|
||||
|
||||
assert(compiler->supports_pull_constants);
|
||||
assert(compiler->compact_params);
|
||||
|
||||
/* If we got here, we also can't have any push ranges */
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
assert(prog_data->base.ubo_ranges[i].length == 0);
|
||||
|
||||
/* Make some sort of choice as to which uniforms get sent to pull
|
||||
* constants. We could potentially do something clever here like
|
||||
* look for the most infrequently used uniform vec4s, but leave
|
||||
@@ -1811,34 +1810,64 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
|
||||
return payload_reg + vs_prog_data->nr_attribute_slots;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::setup_push_ranges()
|
||||
{
|
||||
/* Only allow 32 registers (256 uniform components) as push constants,
|
||||
* which is the limit on gfx6.
|
||||
*
|
||||
* If changing this value, note the limitation about total_regs in
|
||||
* brw_curbe.c.
|
||||
*/
|
||||
const unsigned max_push_length = 32;
|
||||
|
||||
push_length = DIV_ROUND_UP(prog_data->base.nr_params, 8);
|
||||
push_length = MIN2(push_length, max_push_length);
|
||||
|
||||
/* Shrink UBO push ranges so it all fits in max_push_length */
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
struct brw_ubo_range *range = &prog_data->base.ubo_ranges[i];
|
||||
|
||||
if (push_length + range->length > max_push_length)
|
||||
range->length = max_push_length - push_length;
|
||||
|
||||
push_length += range->length;
|
||||
}
|
||||
assert(push_length <= max_push_length);
|
||||
}
|
||||
|
||||
int
|
||||
vec4_visitor::setup_uniforms(int reg)
|
||||
{
|
||||
prog_data->base.dispatch_grf_start_reg = reg;
|
||||
/* It's possible that uniform compaction will shrink further than expected
|
||||
* so we re-compute the layout and set up our UBO push starts.
|
||||
*/
|
||||
const unsigned old_push_length = push_length;
|
||||
push_length = DIV_ROUND_UP(prog_data->base.nr_params, 8);
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
ubo_push_start[i] = push_length;
|
||||
push_length += stage_prog_data->ubo_ranges[i].length;
|
||||
}
|
||||
assert(push_length <= old_push_length);
|
||||
if (push_length < old_push_length)
|
||||
assert(compiler->compact_params);
|
||||
|
||||
/* The pre-gfx6 VS requires that some push constants get loaded no
|
||||
* matter what, or the GPU would hang.
|
||||
*/
|
||||
if (devinfo->ver < 6 && this->uniforms == 0) {
|
||||
if (devinfo->ver < 6 && push_length == 0) {
|
||||
brw_stage_prog_data_add_params(stage_prog_data, 4);
|
||||
for (unsigned int i = 0; i < 4; i++) {
|
||||
unsigned int slot = this->uniforms * 4 + i;
|
||||
stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO;
|
||||
}
|
||||
|
||||
this->uniforms++;
|
||||
reg++;
|
||||
} else {
|
||||
reg += ALIGN(uniforms, 2) / 2;
|
||||
push_length = 1;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
reg += stage_prog_data->ubo_ranges[i].length;
|
||||
prog_data->base.dispatch_grf_start_reg = reg;
|
||||
prog_data->base.curb_read_length = push_length;
|
||||
|
||||
prog_data->base.curb_read_length =
|
||||
reg - prog_data->base.dispatch_grf_start_reg;
|
||||
|
||||
return reg;
|
||||
return reg + push_length;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -2667,6 +2696,8 @@ vec4_visitor::run()
|
||||
if (shader_time_index >= 0)
|
||||
emit_shader_time_begin();
|
||||
|
||||
setup_push_ranges();
|
||||
|
||||
emit_prolog();
|
||||
|
||||
emit_nir_code();
|
||||
|
@@ -108,6 +108,8 @@ public:
|
||||
const char *current_annotation;
|
||||
|
||||
int first_non_payload_grf;
|
||||
unsigned ubo_push_start[4];
|
||||
unsigned push_length;
|
||||
unsigned int max_grf;
|
||||
brw_analysis<brw::vec4_live_variables, backend_shader> live_analysis;
|
||||
brw_analysis<brw::performance, vec4_visitor> performance_analysis;
|
||||
@@ -139,6 +141,7 @@ public:
|
||||
void move_push_constants_to_pull_constants();
|
||||
void split_uniform_registers();
|
||||
void pack_uniform_registers();
|
||||
void setup_push_ranges();
|
||||
virtual void invalidate_analysis(brw::analysis_dependency_class c);
|
||||
void split_virtual_grfs();
|
||||
bool opt_vector_float();
|
||||
|
@@ -1772,6 +1772,8 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
|
||||
prog_data(prog_data),
|
||||
fail_msg(NULL),
|
||||
first_non_payload_grf(0),
|
||||
ubo_push_start(),
|
||||
push_length(0),
|
||||
live_analysis(this), performance_analysis(this),
|
||||
need_all_constants_in_pull_buffer(false),
|
||||
no_spills(no_spills),
|
||||
|
Reference in New Issue
Block a user