intel/compiler: Move spill/fill tracking to the register allocator

Originally, we had virtual opcodes for scratch access, and let the
generator count spills/fills separately from other sends.  Later, we
started using the generic SHADER_OPCODE_SEND for spills/fills on some
generations of hardware, and simply detected stateless messages there.

But then we started using stateless messages for other things:
- anv uses stateless messages for the buffer device address feature.
- nir_opt_large_constants generates stateless messages.
- XeHP curbe setup can generate stateless messages.

So counting stateless messages is not accurate.  Instead, we move the
spill/fill accounting to the register allocator, as it generates such
things, as well as the load/store_scratch intrinsic handling, as those
are basically spill/fills, just at a higher level.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16691>
This commit is contained in:
Kenneth Graunke
2022-05-24 02:44:53 -07:00
committed by Marge Bot
parent 4896e136b6
commit 9886615958
5 changed files with 47 additions and 34 deletions

View File

@@ -348,10 +348,10 @@ private:
void build_interference_graph(bool allow_spilling);
void discard_interference_graph();
void emit_unspill(const fs_builder &bld, fs_reg dst,
uint32_t spill_offset, unsigned count);
void emit_spill(const fs_builder &bld, fs_reg src,
uint32_t spill_offset, unsigned count);
void emit_unspill(const fs_builder &bld, struct shader_stats *stats,
fs_reg dst, uint32_t spill_offset, unsigned count);
void emit_spill(const fs_builder &bld, struct shader_stats *stats,
fs_reg src, uint32_t spill_offset, unsigned count);
void set_spill_costs();
int choose_spill_reg();
@@ -738,7 +738,9 @@ fs_reg_alloc::discard_interference_graph()
}
void
fs_reg_alloc::emit_unspill(const fs_builder &bld, fs_reg dst,
fs_reg_alloc::emit_unspill(const fs_builder &bld,
struct shader_stats *stats,
fs_reg dst,
uint32_t spill_offset, unsigned count)
{
const intel_device_info *devinfo = bld.shader->devinfo;
@@ -747,6 +749,8 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, fs_reg dst,
assert(count % reg_size == 0);
for (unsigned i = 0; i < count / reg_size; i++) {
++stats->fill_count;
fs_inst *unspill_inst;
if (devinfo->ver >= 9) {
fs_reg header = this->scratch_header;
@@ -803,7 +807,9 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, fs_reg dst,
}
void
fs_reg_alloc::emit_spill(const fs_builder &bld, fs_reg src,
fs_reg_alloc::emit_spill(const fs_builder &bld,
struct shader_stats *stats,
fs_reg src,
uint32_t spill_offset, unsigned count)
{
const intel_device_info *devinfo = bld.shader->devinfo;
@@ -812,6 +818,8 @@ fs_reg_alloc::emit_spill(const fs_builder &bld, fs_reg src,
assert(count % reg_size == 0);
for (unsigned i = 0; i < count / reg_size; i++) {
++stats->spill_count;
fs_inst *spill_inst;
if (devinfo->ver >= 9) {
fs_reg header = this->scratch_header;
@@ -1098,8 +1106,8 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
* 32 bit channels. It shouldn't hurt in any case because the
* unspill destination is a block-local temporary.
*/
emit_unspill(ibld.exec_all().group(width, 0), unspill_dst,
subset_spill_offset, count);
emit_unspill(ibld.exec_all().group(width, 0), &fs->shader_stats,
unspill_dst, subset_spill_offset, count);
}
}
@@ -1153,10 +1161,10 @@ fs_reg_alloc::spill_reg(unsigned spill_reg)
*/
if (inst->is_partial_write() ||
(!inst->force_writemask_all && !per_channel))
emit_unspill(ubld, spill_src, subset_spill_offset,
regs_written(inst));
emit_unspill(ubld, &fs->shader_stats, spill_src,
subset_spill_offset, regs_written(inst));
emit_spill(ubld.at(block, inst->next), spill_src,
emit_spill(ubld.at(block, inst->next), &fs->shader_stats, spill_src,
subset_spill_offset, regs_written(inst));
}