anv, iris: Disable pre fetching the binding table entries on DG2
On DG2 the HW will fetch the binding entries into the cache for every single thread when a compute walker is dispatched, wiping out the advantages of the cache prefetch. The spec also advises to not do a cache prefetch when we have more than 31 binding table entries, but most real world applications will never hit that limit. Signed-off-by: Rohan Garg <rohan.garg@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18498>
This commit is contained in:
@@ -4741,7 +4741,9 @@ iris_store_cs_state(const struct intel_device_info *devinfo,
|
||||
assert(cs_prog_data->push.cross_thread.regs == 0);
|
||||
#endif
|
||||
desc.BarrierEnable = cs_prog_data->uses_barrier;
|
||||
desc.BindingTableEntryCount = MIN2(shader->bt.size_bytes / 4, 31);
|
||||
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
||||
desc.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||
0 : MIN2(shader->bt.size_bytes / 4, 31);
|
||||
desc.SamplerCount = encode_sampler_count(shader);
|
||||
#if GFX_VER >= 12
|
||||
/* TODO: Check if we are missing workarounds and enable mid-thread
|
||||
@@ -7212,7 +7214,9 @@ iris_upload_compute_walker(struct iris_context *ice,
|
||||
.SamplerStatePointer = shs->sampler_table.offset,
|
||||
.SamplerCount = encode_sampler_count(shader),
|
||||
.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE],
|
||||
.BindingTableEntryCount = MIN2(shader->bt.size_bytes / 4, 31),
|
||||
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
||||
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||
0 : MIN2(shader->bt.size_bytes / 4, 31),
|
||||
};
|
||||
|
||||
assert(brw_cs_push_const_total_size(cs_prog_data, dispatch.threads) == 0);
|
||||
|
@@ -5096,8 +5096,9 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
||||
cmd_buffer->state.samplers[MESA_SHADER_COMPUTE].offset,
|
||||
.BindingTablePointer =
|
||||
cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
|
||||
.BindingTableEntryCount =
|
||||
1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
|
||||
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
||||
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||
0 : 1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
|
||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
|
||||
prog_data->base.total_shared),
|
||||
|
@@ -2169,8 +2169,11 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
||||
.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(cs_bin),
|
||||
/* We add 1 because the CS indirect parameters buffer isn't accounted
|
||||
* for in bind_map.surface_count.
|
||||
*
|
||||
* Typically set to 0 to avoid prefetching on every thread dispatch.
|
||||
*/
|
||||
.BindingTableEntryCount = 1 + MIN2(cs_bin->bind_map.surface_count, 30),
|
||||
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||
0 : 1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
|
||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||
.SharedLocalMemorySize =
|
||||
encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
|
||||
|
Reference in New Issue
Block a user