intel: Add driver support for hardware generated local invocation IDs
This adds a few new fields in the brw_cs_prog_data struct and then uses them to fill in the relevant COMPUTE_WALKER fields. Although the Tile Layout field theoretically has different settings for 32/64/128bpe, it appears that the recommended programming is to always pick either TileY 32bpe or Linear. It's not very practical to look at the surface formats involved, anyway. Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27167>
This commit is contained in:

committed by
Marge Bot

parent
10ed4f1cab
commit
5e7f4ff97f
@@ -8677,6 +8677,14 @@ iris_upload_compute_walker(struct iris_context *ice,
|
|||||||
cw.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
|
cw.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
|
||||||
cw.InterfaceDescriptor = idd;
|
cw.InterfaceDescriptor = idd;
|
||||||
|
|
||||||
|
#if GFX_VERx10 >= 125
|
||||||
|
cw.GenerateLocalID = cs_prog_data->generate_local_id != 0;
|
||||||
|
cw.EmitLocal = cs_prog_data->generate_local_id;
|
||||||
|
cw.WalkOrder = cs_prog_data->walk_order;
|
||||||
|
cw.TileLayout = cs_prog_data->walk_order == BRW_WALK_ORDER_YXZ ?
|
||||||
|
TileY32bpe : Linear;
|
||||||
|
#endif
|
||||||
|
|
||||||
assert(brw_cs_push_const_total_size(cs_prog_data, dispatch.threads) == 0);
|
assert(brw_cs_push_const_total_size(cs_prog_data, dispatch.threads) == 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -2191,6 +2191,14 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
|
|||||||
cw.IndirectDataStartAddress = push_const_offset;
|
cw.IndirectDataStartAddress = push_const_offset;
|
||||||
cw.IndirectDataLength = push_const_size;
|
cw.IndirectDataLength = push_const_size;
|
||||||
|
|
||||||
|
#if GFX_VERx10 >= 125
|
||||||
|
cw.GenerateLocalID = cs_prog_data->generate_local_id != 0;
|
||||||
|
cw.EmitLocal = cs_prog_data->generate_local_id;
|
||||||
|
cw.WalkOrder = cs_prog_data->walk_order;
|
||||||
|
cw.TileLayout = cs_prog_data->walk_order == BRW_WALK_ORDER_YXZ ?
|
||||||
|
TileY32bpe : Linear;
|
||||||
|
#endif
|
||||||
|
|
||||||
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
||||||
.KernelStartPointer = params->cs_prog_kernel,
|
.KernelStartPointer = params->cs_prog_kernel,
|
||||||
.SamplerStatePointer = samplers_offset,
|
.SamplerStatePointer = samplers_offset,
|
||||||
|
@@ -1322,6 +1322,15 @@ struct brw_push_const_block {
|
|||||||
unsigned size; /* Bytes, register aligned */
|
unsigned size; /* Bytes, register aligned */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum PACKED brw_compute_walk_order {
|
||||||
|
BRW_WALK_ORDER_XYZ = 0,
|
||||||
|
BRW_WALK_ORDER_XZY = 1,
|
||||||
|
BRW_WALK_ORDER_YXZ = 2,
|
||||||
|
BRW_WALK_ORDER_YZX = 3,
|
||||||
|
BRW_WALK_ORDER_ZXY = 4,
|
||||||
|
BRW_WALK_ORDER_ZYX = 5,
|
||||||
|
};
|
||||||
|
|
||||||
struct brw_cs_prog_data {
|
struct brw_cs_prog_data {
|
||||||
struct brw_stage_prog_data base;
|
struct brw_stage_prog_data base;
|
||||||
|
|
||||||
@@ -1344,6 +1353,8 @@ struct brw_cs_prog_data {
|
|||||||
bool uses_inline_data;
|
bool uses_inline_data;
|
||||||
bool uses_btd_stack_ids;
|
bool uses_btd_stack_ids;
|
||||||
bool uses_systolic;
|
bool uses_systolic;
|
||||||
|
uint8_t generate_local_id;
|
||||||
|
enum brw_compute_walk_order walk_order;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
struct brw_push_const_block cross_thread;
|
struct brw_push_const_block cross_thread;
|
||||||
|
@@ -359,6 +359,11 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
#if GFX_VERx10 == 125
|
#if GFX_VERx10 == 125
|
||||||
.SystolicModeEnable = prog_data->uses_systolic,
|
.SystolicModeEnable = prog_data->uses_systolic,
|
||||||
#endif
|
#endif
|
||||||
|
.GenerateLocalID = prog_data->generate_local_id != 0,
|
||||||
|
.EmitLocal = prog_data->generate_local_id,
|
||||||
|
.WalkOrder = prog_data->walk_order,
|
||||||
|
.TileLayout = prog_data->walk_order == BRW_WALK_ORDER_YXZ ?
|
||||||
|
TileY32bpe : Linear,
|
||||||
.LocalXMaximum = prog_data->local_size[0] - 1,
|
.LocalXMaximum = prog_data->local_size[0] - 1,
|
||||||
.LocalYMaximum = prog_data->local_size[1] - 1,
|
.LocalYMaximum = prog_data->local_size[1] - 1,
|
||||||
.LocalZMaximum = prog_data->local_size[2] - 1,
|
.LocalZMaximum = prog_data->local_size[2] - 1,
|
||||||
|
@@ -557,6 +557,14 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
|
|||||||
cw.ExecutionMask = dispatch.right_mask;
|
cw.ExecutionMask = dispatch.right_mask;
|
||||||
cw.PostSync.MOCS = anv_mocs(device, NULL, 0);
|
cw.PostSync.MOCS = anv_mocs(device, NULL, 0);
|
||||||
|
|
||||||
|
#if GFX_VERx10 >= 125
|
||||||
|
cw.GenerateLocalID = prog_data->generate_local_id != 0;
|
||||||
|
cw.EmitLocal = prog_data->generate_local_id;
|
||||||
|
cw.WalkOrder = prog_data->walk_order;
|
||||||
|
cw.TileLayout = prog_data->walk_order == BRW_WALK_ORDER_YXZ ?
|
||||||
|
TileY32bpe : Linear;
|
||||||
|
#endif
|
||||||
|
|
||||||
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
||||||
.KernelStartPointer = state->kernel->kernel.offset +
|
.KernelStartPointer = state->kernel->kernel.offset +
|
||||||
brw_cs_prog_data_prog_offset(prog_data,
|
brw_cs_prog_data_prog_offset(prog_data,
|
||||||
|
Reference in New Issue
Block a user