intel/compiler: Move spill/fill tracking to the register allocator
Originally, we had virtual opcodes for scratch access, and let the generator count spills/fills separately from other sends. Later, we started using the generic SHADER_OPCODE_SEND for spills/fills on some generations of hardware, and simply detected stateless messages there. But then we started using stateless messages for other things: - anv uses stateless messages for the buffer device address feature. - nir_opt_large_constants generates stateless messages. - XeHP curbe setup can generate stateless messages. So counting stateless messages is not accurate. Instead, we move the spill/fill accounting to the register allocator, as it generates such things, as well as the load/store_scratch intrinsic handling, as those are basically spill/fills, just at a higher level. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16691>
This commit is contained in:

committed by
Marge Bot

parent
4896e136b6
commit
9886615958
@@ -81,6 +81,8 @@ offset(const fs_reg ®, const brw::fs_builder &bld, unsigned delta)
|
|||||||
struct shader_stats {
|
struct shader_stats {
|
||||||
const char *scheduler_mode;
|
const char *scheduler_mode;
|
||||||
unsigned promoted_constants;
|
unsigned promoted_constants;
|
||||||
|
unsigned spill_count;
|
||||||
|
unsigned fill_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -1826,13 +1826,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
|
|
||||||
int start_offset = p->next_insn_offset;
|
int start_offset = p->next_insn_offset;
|
||||||
|
|
||||||
/* `send_count` explicitly does not include spills or fills, as we'd
|
|
||||||
* like to use it as a metric for intentional memory access or other
|
|
||||||
* shared function use. Otherwise, subtle changes to scheduling or
|
|
||||||
* register allocation could cause it to fluctuate wildly - and that
|
|
||||||
* effect is already counted in spill/fill counts.
|
|
||||||
*/
|
|
||||||
int spill_count = 0, fill_count = 0;
|
|
||||||
int loop_count = 0, send_count = 0, nop_count = 0;
|
int loop_count = 0, send_count = 0, nop_count = 0;
|
||||||
bool is_accum_used = false;
|
bool is_accum_used = false;
|
||||||
|
|
||||||
@@ -2265,15 +2258,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
case SHADER_OPCODE_SEND:
|
case SHADER_OPCODE_SEND:
|
||||||
generate_send(inst, dst, src[0], src[1], src[2],
|
generate_send(inst, dst, src[0], src[1], src[2],
|
||||||
inst->ex_mlen > 0 ? src[3] : brw_null_reg());
|
inst->ex_mlen > 0 ? src[3] : brw_null_reg());
|
||||||
if ((inst->desc & 0xff) == BRW_BTI_STATELESS ||
|
send_count++;
|
||||||
(inst->desc & 0xff) == GFX8_BTI_STATELESS_NON_COHERENT) {
|
|
||||||
if (inst->size_written)
|
|
||||||
fill_count++;
|
|
||||||
else
|
|
||||||
spill_count++;
|
|
||||||
} else {
|
|
||||||
send_count++;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
||||||
@@ -2306,17 +2291,17 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
|
|
||||||
case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
|
case SHADER_OPCODE_GFX4_SCRATCH_WRITE:
|
||||||
generate_scratch_write(inst, src[0]);
|
generate_scratch_write(inst, src[0]);
|
||||||
spill_count++;
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_GFX4_SCRATCH_READ:
|
case SHADER_OPCODE_GFX4_SCRATCH_READ:
|
||||||
generate_scratch_read(inst, dst);
|
generate_scratch_read(inst, dst);
|
||||||
fill_count++;
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_GFX7_SCRATCH_READ:
|
case SHADER_OPCODE_GFX7_SCRATCH_READ:
|
||||||
generate_scratch_read_gfx7(inst, dst);
|
generate_scratch_read_gfx7(inst, dst);
|
||||||
fill_count++;
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_SCRATCH_HEADER:
|
case SHADER_OPCODE_SCRATCH_HEADER:
|
||||||
@@ -2630,6 +2615,15 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
/* end of program sentinel */
|
/* end of program sentinel */
|
||||||
disasm_new_inst_group(disasm_info, p->next_insn_offset);
|
disasm_new_inst_group(disasm_info, p->next_insn_offset);
|
||||||
|
|
||||||
|
/* `send_count` explicitly does not include spills or fills, as we'd
|
||||||
|
* like to use it as a metric for intentional memory access or other
|
||||||
|
* shared function use. Otherwise, subtle changes to scheduling or
|
||||||
|
* register allocation could cause it to fluctuate wildly - and that
|
||||||
|
* effect is already counted in spill/fill counts.
|
||||||
|
*/
|
||||||
|
send_count -= shader_stats.spill_count;
|
||||||
|
send_count -= shader_stats.fill_count;
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
bool validated =
|
bool validated =
|
||||||
#else
|
#else
|
||||||
@@ -2661,7 +2655,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
shader_name, sha1buf,
|
shader_name, sha1buf,
|
||||||
dispatch_width, before_size / 16,
|
dispatch_width, before_size / 16,
|
||||||
loop_count, perf.latency,
|
loop_count, perf.latency,
|
||||||
spill_count, fill_count, send_count,
|
shader_stats.spill_count,
|
||||||
|
shader_stats.fill_count,
|
||||||
|
send_count,
|
||||||
shader_stats.scheduler_mode,
|
shader_stats.scheduler_mode,
|
||||||
shader_stats.promoted_constants,
|
shader_stats.promoted_constants,
|
||||||
before_size, after_size,
|
before_size, after_size,
|
||||||
@@ -2693,7 +2689,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
_mesa_shader_stage_to_abbrev(stage),
|
_mesa_shader_stage_to_abbrev(stage),
|
||||||
dispatch_width, before_size / 16 - nop_count,
|
dispatch_width, before_size / 16 - nop_count,
|
||||||
loop_count, perf.latency,
|
loop_count, perf.latency,
|
||||||
spill_count, fill_count, send_count,
|
shader_stats.spill_count,
|
||||||
|
shader_stats.fill_count,
|
||||||
|
send_count,
|
||||||
shader_stats.scheduler_mode,
|
shader_stats.scheduler_mode,
|
||||||
shader_stats.promoted_constants,
|
shader_stats.promoted_constants,
|
||||||
before_size, after_size);
|
before_size, after_size);
|
||||||
@@ -2703,8 +2701,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||||||
stats->sends = send_count;
|
stats->sends = send_count;
|
||||||
stats->loops = loop_count;
|
stats->loops = loop_count;
|
||||||
stats->cycles = perf.latency;
|
stats->cycles = perf.latency;
|
||||||
stats->spills = spill_count;
|
stats->spills = shader_stats.spill_count;
|
||||||
stats->fills = fill_count;
|
stats->fills = shader_stats.fill_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
return start_offset;
|
return start_offset;
|
||||||
|
@@ -5177,6 +5177,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||||||
read_result, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
read_result, srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
bld.MOV(dest, read_result);
|
bld.MOV(dest, read_result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
shader_stats.fill_count += DIV_ROUND_UP(dispatch_width, 16);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5250,6 +5252,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||||||
bld.emit(SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
|
bld.emit(SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
|
||||||
fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS);
|
fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS);
|
||||||
}
|
}
|
||||||
|
shader_stats.spill_count += DIV_ROUND_UP(dispatch_width, 16);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -348,10 +348,10 @@ private:
|
|||||||
void build_interference_graph(bool allow_spilling);
|
void build_interference_graph(bool allow_spilling);
|
||||||
void discard_interference_graph();
|
void discard_interference_graph();
|
||||||
|
|
||||||
void emit_unspill(const fs_builder &bld, fs_reg dst,
|
void emit_unspill(const fs_builder &bld, struct shader_stats *stats,
|
||||||
uint32_t spill_offset, unsigned count);
|
fs_reg dst, uint32_t spill_offset, unsigned count);
|
||||||
void emit_spill(const fs_builder &bld, fs_reg src,
|
void emit_spill(const fs_builder &bld, struct shader_stats *stats,
|
||||||
uint32_t spill_offset, unsigned count);
|
fs_reg src, uint32_t spill_offset, unsigned count);
|
||||||
|
|
||||||
void set_spill_costs();
|
void set_spill_costs();
|
||||||
int choose_spill_reg();
|
int choose_spill_reg();
|
||||||
@@ -738,7 +738,9 @@ fs_reg_alloc::discard_interference_graph()
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_reg_alloc::emit_unspill(const fs_builder &bld, fs_reg dst,
|
fs_reg_alloc::emit_unspill(const fs_builder &bld,
|
||||||
|
struct shader_stats *stats,
|
||||||
|
fs_reg dst,
|
||||||
uint32_t spill_offset, unsigned count)
|
uint32_t spill_offset, unsigned count)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
@@ -747,6 +749,8 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, fs_reg dst,
|
|||||||
assert(count % reg_size == 0);
|
assert(count % reg_size == 0);
|
||||||
|
|
||||||
for (unsigned i = 0; i < count / reg_size; i++) {
|
for (unsigned i = 0; i < count / reg_size; i++) {
|
||||||
|
++stats->fill_count;
|
||||||
|
|
||||||
fs_inst *unspill_inst;
|
fs_inst *unspill_inst;
|
||||||
if (devinfo->ver >= 9) {
|
if (devinfo->ver >= 9) {
|
||||||
fs_reg header = this->scratch_header;
|
fs_reg header = this->scratch_header;
|
||||||
@@ -803,7 +807,9 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, fs_reg dst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
fs_reg_alloc::emit_spill(const fs_builder &bld, fs_reg src,
|
fs_reg_alloc::emit_spill(const fs_builder &bld,
|
||||||
|
struct shader_stats *stats,
|
||||||
|
fs_reg src,
|
||||||
uint32_t spill_offset, unsigned count)
|
uint32_t spill_offset, unsigned count)
|
||||||
{
|
{
|
||||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||||
@@ -812,6 +818,8 @@ fs_reg_alloc::emit_spill(const fs_builder &bld, fs_reg src,
|
|||||||
assert(count % reg_size == 0);
|
assert(count % reg_size == 0);
|
||||||
|
|
||||||
for (unsigned i = 0; i < count / reg_size; i++) {
|
for (unsigned i = 0; i < count / reg_size; i++) {
|
||||||
|
++stats->spill_count;
|
||||||
|
|
||||||
fs_inst *spill_inst;
|
fs_inst *spill_inst;
|
||||||
if (devinfo->ver >= 9) {
|
if (devinfo->ver >= 9) {
|
||||||
fs_reg header = this->scratch_header;
|
fs_reg header = this->scratch_header;
|
||||||
@@ -1098,8 +1106,8 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||||||
* 32 bit channels. It shouldn't hurt in any case because the
|
* 32 bit channels. It shouldn't hurt in any case because the
|
||||||
* unspill destination is a block-local temporary.
|
* unspill destination is a block-local temporary.
|
||||||
*/
|
*/
|
||||||
emit_unspill(ibld.exec_all().group(width, 0), unspill_dst,
|
emit_unspill(ibld.exec_all().group(width, 0), &fs->shader_stats,
|
||||||
subset_spill_offset, count);
|
unspill_dst, subset_spill_offset, count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1153,10 +1161,10 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
|
|||||||
*/
|
*/
|
||||||
if (inst->is_partial_write() ||
|
if (inst->is_partial_write() ||
|
||||||
(!inst->force_writemask_all && !per_channel))
|
(!inst->force_writemask_all && !per_channel))
|
||||||
emit_unspill(ubld, spill_src, subset_spill_offset,
|
emit_unspill(ubld, &fs->shader_stats, spill_src,
|
||||||
regs_written(inst));
|
subset_spill_offset, regs_written(inst));
|
||||||
|
|
||||||
emit_spill(ubld.at(block, inst->next), spill_src,
|
emit_spill(ubld.at(block, inst->next), &fs->shader_stats, spill_src,
|
||||||
subset_spill_offset, regs_written(inst));
|
subset_spill_offset, regs_written(inst));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1172,6 +1172,8 @@ fs_visitor::init()
|
|||||||
|
|
||||||
this->shader_stats.scheduler_mode = NULL;
|
this->shader_stats.scheduler_mode = NULL;
|
||||||
this->shader_stats.promoted_constants = 0,
|
this->shader_stats.promoted_constants = 0,
|
||||||
|
this->shader_stats.spill_count = 0,
|
||||||
|
this->shader_stats.fill_count = 0,
|
||||||
|
|
||||||
this->grf_used = 0;
|
this->grf_used = 0;
|
||||||
this->spilled_any_registers = false;
|
this->spilled_any_registers = false;
|
||||||
|
Reference in New Issue
Block a user