intel/vec4: Add support for UBO pushing
Shader-db results on Haswell (vec4 only): total instructions in shared programs: 2853928 -> 2726576 (-4.46%) instructions in affected programs: 855840 -> 728488 (-14.88%) helped: 9500 HURT: 18 helped stats (abs) min: 1 max: 359 x̄: 13.54 x̃: 11 helped stats (rel) min: 0.44% max: 53.33% x̄: 19.13% x̃: 17.44% HURT stats (abs) min: 4 max: 124 x̄: 71.00 x̃: 92 HURT stats (rel) min: 3.64% max: 77.86% x̄: 46.43% x̃: 52.12% 95% mean confidence interval for instructions value: -13.78 -12.98 95% mean confidence interval for instructions %-change: -19.21% -18.81% Instructions are helped. total cycles in shared programs: 101822616 -> 60245580 (-40.83%) cycles in affected programs: 93312382 -> 51735346 (-44.56%) helped: 13292 HURT: 4506 helped stats (abs) min: 2 max: 1229260 x̄: 3370.82 x̃: 776 helped stats (rel) min: 0.04% max: 96.70% x̄: 47.56% x̃: 43.76% HURT stats (abs) min: 2 max: 17644 x̄: 716.37 x̃: 82 HURT stats (rel) min: 0.02% max: 491.80% x̄: 41.00% x̃: 11.11% 95% mean confidence interval for cycles value: -3037.07 -1635.03 95% mean confidence interval for cycles %-change: -26.03% -24.25% Cycles are helped. total spills in shared programs: 1080 -> 1314 (21.67%) spills in affected programs: 74 -> 308 (316.22%) helped: 0 HURT: 47 total fills in shared programs: 310 -> 497 (60.32%) fills in affected programs: 71 -> 258 (263.38%) helped: 0 HURT: 47 total sends in shared programs: 239884 -> 151799 (-36.72%) sends in affected programs: 129302 -> 41217 (-68.12%) helped: 9547 HURT: 0 helped stats (abs) min: 1 max: 226 x̄: 9.23 x̃: 8 helped stats (rel) min: 3.12% max: 98.15% x̄: 72.38% x̃: 80.00% 95% mean confidence interval for sends value: -9.48 -8.98 95% mean confidence interval for sends %-change: -72.80% -71.97% Sends are helped. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10571>
This commit is contained in:

committed by
Marge Bot

parent
89fd196f6b
commit
ebba3cad81
@@ -78,8 +78,6 @@ offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta)
|
||||
return offset(reg, bld.dispatch_width(), delta);
|
||||
}
|
||||
|
||||
#define UBO_START ((1 << 16) - 4)
|
||||
|
||||
struct shader_stats {
|
||||
const char *scheduler_mode;
|
||||
unsigned promoted_constants;
|
||||
|
@@ -202,8 +202,7 @@ brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
|
||||
if ((devinfo->verx10 <= 70) ||
|
||||
!compiler->scalar_stage[nir->info.stage]) {
|
||||
if (devinfo->verx10 <= 70) {
|
||||
memset(out_ranges, 0, 4 * sizeof(struct brw_ubo_range));
|
||||
return;
|
||||
}
|
||||
|
@@ -40,6 +40,8 @@ enum instruction_scheduler_mode {
|
||||
SCHEDULE_POST,
|
||||
};
|
||||
|
||||
#define UBO_START ((1 << 16) - 4)
|
||||
|
||||
struct backend_shader {
|
||||
protected:
|
||||
|
||||
|
@@ -593,7 +593,7 @@ vec4_visitor::split_uniform_registers()
|
||||
*/
|
||||
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
|
||||
for (int i = 0 ; i < 3; i++) {
|
||||
if (inst->src[i].file != UNIFORM)
|
||||
if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START)
|
||||
continue;
|
||||
|
||||
assert(!inst->src[i].reladdr);
|
||||
@@ -672,7 +672,7 @@ vec4_visitor::pack_uniform_registers()
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < 3; i++) {
|
||||
if (inst->src[i].file != UNIFORM)
|
||||
if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START)
|
||||
continue;
|
||||
|
||||
assert(type_sz(inst->src[i].type) % 4 == 0);
|
||||
@@ -782,7 +782,7 @@ vec4_visitor::pack_uniform_registers()
|
||||
for (int i = 0 ; i < 3; i++) {
|
||||
int src = inst->src[i].nr;
|
||||
|
||||
if (inst->src[i].file != UNIFORM)
|
||||
if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START)
|
||||
continue;
|
||||
|
||||
int chan = new_chan[src] / channel_sizes[src];
|
||||
@@ -977,7 +977,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
|
||||
*/
|
||||
foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
|
||||
for (int i = 0 ; i < 3; i++) {
|
||||
if (inst->src[i].file != UNIFORM ||
|
||||
if (inst->src[i].file != UNIFORM || inst->src[i].nr >= UBO_START ||
|
||||
pull_constant_loc[inst->src[i].nr] == -1)
|
||||
continue;
|
||||
|
||||
@@ -2078,11 +2078,19 @@ vec4_visitor::convert_to_hw_regs()
|
||||
}
|
||||
|
||||
case UNIFORM: {
|
||||
reg = stride(byte_offset(brw_vec4_grf(
|
||||
prog_data->base.dispatch_grf_start_reg +
|
||||
src.nr / 2, src.nr % 2 * 4),
|
||||
src.offset),
|
||||
0, 4, 1);
|
||||
if (src.nr >= UBO_START) {
|
||||
reg = byte_offset(brw_vec4_grf(
|
||||
prog_data->base.dispatch_grf_start_reg +
|
||||
ubo_push_start[src.nr - UBO_START] +
|
||||
src.offset / 32, 0),
|
||||
src.offset % 32);
|
||||
} else {
|
||||
reg = byte_offset(brw_vec4_grf(
|
||||
prog_data->base.dispatch_grf_start_reg +
|
||||
src.nr / 2, src.nr % 2 * 4),
|
||||
src.offset);
|
||||
}
|
||||
reg = stride(reg, 0, 4, 1);
|
||||
reg.type = src.type;
|
||||
reg.abs = src.abs;
|
||||
reg.negate = src.negate;
|
||||
|
@@ -624,8 +624,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
case nir_intrinsic_load_ubo: {
|
||||
src_reg surf_index;
|
||||
|
||||
prog_data->base.has_ubo_pull = true;
|
||||
|
||||
dest = get_nir_dest(instr->dest);
|
||||
|
||||
if (nir_src_is_const(instr->src[0])) {
|
||||
@@ -647,10 +645,31 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
surf_index = emit_uniformize(surf_index);
|
||||
}
|
||||
|
||||
src_reg push_reg;
|
||||
src_reg offset_reg;
|
||||
if (nir_src_is_const(instr->src[1])) {
|
||||
unsigned load_offset = nir_src_as_uint(instr->src[1]);
|
||||
offset_reg = brw_imm_ud(load_offset & ~15);
|
||||
unsigned aligned_offset = load_offset & ~15;
|
||||
offset_reg = brw_imm_ud(aligned_offset);
|
||||
|
||||
/* See if we've selected this as a push constant candidate */
|
||||
if (nir_src_is_const(instr->src[0])) {
|
||||
const unsigned ubo_block = nir_src_as_uint(instr->src[0]);
|
||||
const unsigned offset_256b = aligned_offset / 32;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
const struct brw_ubo_range *range = &prog_data->base.ubo_ranges[i];
|
||||
if (range->block == ubo_block &&
|
||||
offset_256b >= range->start &&
|
||||
offset_256b < range->start + range->length) {
|
||||
|
||||
push_reg = src_reg(dst_reg(UNIFORM, UBO_START + i));
|
||||
push_reg.type = dest.type;
|
||||
push_reg.offset = aligned_offset - 32 * range->start;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
offset_reg = src_reg(this, glsl_type::uint_type);
|
||||
emit(MOV(dst_reg(offset_reg),
|
||||
@@ -658,12 +677,15 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
}
|
||||
|
||||
src_reg packed_consts;
|
||||
if (nir_dest_bit_size(instr->dest) == 32) {
|
||||
if (push_reg.file != BAD_FILE) {
|
||||
packed_consts = push_reg;
|
||||
} else if (nir_dest_bit_size(instr->dest) == 32) {
|
||||
packed_consts = src_reg(this, glsl_type::vec4_type);
|
||||
emit_pull_constant_load_reg(dst_reg(packed_consts),
|
||||
surf_index,
|
||||
offset_reg,
|
||||
NULL, NULL /* before_block/inst */);
|
||||
prog_data->base.has_ubo_pull = true;
|
||||
} else {
|
||||
src_reg temp = src_reg(this, glsl_type::dvec4_type);
|
||||
src_reg temp_float = retype(temp, BRW_REGISTER_TYPE_F);
|
||||
@@ -676,6 +698,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
emit(ADD(dst_reg(offset_reg), offset_reg, brw_imm_ud(16u)));
|
||||
emit_pull_constant_load_reg(dst_reg(byte_offset(temp_float, REG_SIZE)),
|
||||
surf_index, offset_reg, NULL, NULL);
|
||||
prog_data->base.has_ubo_pull = true;
|
||||
|
||||
packed_consts = src_reg(this, glsl_type::dvec4_type);
|
||||
shuffle_64bit_data(dst_reg(packed_consts), temp, false);
|
||||
|
Reference in New Issue
Block a user