anv, iris: Disable pre fetching the binding table entries on DG2
On DG2 the HW will fetch the binding entries into the cache for every single thread when a compute walker is dispatched, wiping out the advantages of the cache prefetch. The spec also advises to not do a cache prefetch when we have more than 31 binding table entries, but most real world applications will never hit that limit. Signed-off-by: Rohan Garg <rohan.garg@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18498>
This commit is contained in:
@@ -4741,7 +4741,9 @@ iris_store_cs_state(const struct intel_device_info *devinfo,
|
|||||||
assert(cs_prog_data->push.cross_thread.regs == 0);
|
assert(cs_prog_data->push.cross_thread.regs == 0);
|
||||||
#endif
|
#endif
|
||||||
desc.BarrierEnable = cs_prog_data->uses_barrier;
|
desc.BarrierEnable = cs_prog_data->uses_barrier;
|
||||||
desc.BindingTableEntryCount = MIN2(shader->bt.size_bytes / 4, 31);
|
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
||||||
|
desc.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||||
|
0 : MIN2(shader->bt.size_bytes / 4, 31);
|
||||||
desc.SamplerCount = encode_sampler_count(shader);
|
desc.SamplerCount = encode_sampler_count(shader);
|
||||||
#if GFX_VER >= 12
|
#if GFX_VER >= 12
|
||||||
/* TODO: Check if we are missing workarounds and enable mid-thread
|
/* TODO: Check if we are missing workarounds and enable mid-thread
|
||||||
@@ -7212,7 +7214,9 @@ iris_upload_compute_walker(struct iris_context *ice,
|
|||||||
.SamplerStatePointer = shs->sampler_table.offset,
|
.SamplerStatePointer = shs->sampler_table.offset,
|
||||||
.SamplerCount = encode_sampler_count(shader),
|
.SamplerCount = encode_sampler_count(shader),
|
||||||
.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE],
|
.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE],
|
||||||
.BindingTableEntryCount = MIN2(shader->bt.size_bytes / 4, 31),
|
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
||||||
|
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||||
|
0 : MIN2(shader->bt.size_bytes / 4, 31),
|
||||||
};
|
};
|
||||||
|
|
||||||
assert(brw_cs_push_const_total_size(cs_prog_data, dispatch.threads) == 0);
|
assert(brw_cs_push_const_total_size(cs_prog_data, dispatch.threads) == 0);
|
||||||
|
@@ -5096,8 +5096,9 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
cmd_buffer->state.samplers[MESA_SHADER_COMPUTE].offset,
|
cmd_buffer->state.samplers[MESA_SHADER_COMPUTE].offset,
|
||||||
.BindingTablePointer =
|
.BindingTablePointer =
|
||||||
cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
|
cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
|
||||||
.BindingTableEntryCount =
|
/* Typically set to 0 to avoid prefetching on every thread dispatch. */
|
||||||
1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
|
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||||
|
0 : 1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
|
||||||
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||||
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
|
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
|
||||||
prog_data->base.total_shared),
|
prog_data->base.total_shared),
|
||||||
|
@@ -2169,8 +2169,11 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
|||||||
.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(cs_bin),
|
.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(cs_bin),
|
||||||
/* We add 1 because the CS indirect parameters buffer isn't accounted
|
/* We add 1 because the CS indirect parameters buffer isn't accounted
|
||||||
* for in bind_map.surface_count.
|
* for in bind_map.surface_count.
|
||||||
|
*
|
||||||
|
* Typically set to 0 to avoid prefetching on every thread dispatch.
|
||||||
*/
|
*/
|
||||||
.BindingTableEntryCount = 1 + MIN2(cs_bin->bind_map.surface_count, 30),
|
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
||||||
|
0 : 1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
|
||||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||||
.SharedLocalMemorySize =
|
.SharedLocalMemorySize =
|
||||||
encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
|
encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
|
||||||
|
Reference in New Issue
Block a user