intel: Switch to COMPUTE_WALKER_BODY

Stuff COMPUTE_WALKER_BODY in COMPUTER_WALKER in both iris and anv.

This also fixes the tracepoint for ray dispatches. Stuffing
COMPUTE_WALKER_BODY allow us to set the
cmd_buffer->state.last_compute_walker.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31822>
This commit is contained in:
Sagar Ghuge
2024-10-24 05:10:18 -07:00
committed by Marge Bot
parent 938f5ec7ce
commit 17096f87c1
8 changed files with 218 additions and 296 deletions

View File

@@ -660,9 +660,9 @@ iris_rewrite_compute_walker_pc(struct iris_batch *batch,
uint32_t dwords[GENX(COMPUTE_WALKER_length)];
_iris_pack_command(batch, GENX(COMPUTE_WALKER), dwords, cw) {
cw.PostSync.Operation = WriteTimestamp;
cw.PostSync.DestinationAddress = addr;
cw.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
cw.body.PostSync.Operation = WriteTimestamp;
cw.body.PostSync.DestinationAddress = addr;
cw.body.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
}
for (uint32_t i = 0; i < GENX(COMPUTE_WALKER_length); i++)
@@ -9012,29 +9012,33 @@ iris_upload_compute_walker(struct iris_context *ice,
ice->utrace.last_compute_walker =
iris_emit_dwords(batch, GENX(COMPUTE_WALKER_length));
struct GENX(COMPUTE_WALKER_BODY) body = {
.SIMDSize = dispatch.simd_size / 16,
.MessageSIMD = dispatch.simd_size / 16,
.LocalXMaximum = grid->block[0] - 1,
.LocalYMaximum = grid->block[1] - 1,
.LocalZMaximum = grid->block[2] - 1,
.ThreadGroupIDXDimension = grid->grid[0],
.ThreadGroupIDYDimension = grid->grid[1],
.ThreadGroupIDZDimension = grid->grid[2],
.ExecutionMask = dispatch.right_mask,
.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0),
.InterfaceDescriptor = idd,
#if GFX_VERx10 >= 125
.GenerateLocalID = cs_data->generate_local_id != 0,
.EmitLocal = cs_data->generate_local_id,
.WalkOrder = cs_data->walk_order,
.TileLayout = cs_data->walk_order == INTEL_WALK_ORDER_YXZ ?
TileY32bpe : Linear,
#endif
};
_iris_pack_command(batch, GENX(COMPUTE_WALKER),
ice->utrace.last_compute_walker, cw) {
cw.IndirectParameterEnable = grid->indirect;
cw.SIMDSize = dispatch.simd_size / 16;
cw.MessageSIMD = dispatch.simd_size / 16;
cw.LocalXMaximum = grid->block[0] - 1;
cw.LocalYMaximum = grid->block[1] - 1;
cw.LocalZMaximum = grid->block[2] - 1;
cw.ThreadGroupIDXDimension = grid->grid[0];
cw.ThreadGroupIDYDimension = grid->grid[1];
cw.ThreadGroupIDZDimension = grid->grid[2];
cw.ExecutionMask = dispatch.right_mask;
cw.PostSync.MOCS = iris_mocs(NULL, &screen->isl_dev, 0);
cw.InterfaceDescriptor = idd;
#if GFX_VERx10 >= 125
cw.GenerateLocalID = cs_data->generate_local_id != 0;
cw.EmitLocal = cs_data->generate_local_id;
cw.WalkOrder = cs_data->walk_order;
cw.TileLayout = cs_data->walk_order == INTEL_WALK_ORDER_YXZ ?
TileY32bpe : Linear;
#endif
cw.body = body;
assert(iris_cs_push_const_total_size(shader, dispatch.threads) == 0);
}
}

View File

@@ -1653,43 +1653,42 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
assert(cs_prog_data->local_size[2] == 1);
#if GFX_VERx10 >= 125
assert(cs_prog_data->push.per_thread.regs == 0);
blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
cw.SIMDSize = dispatch.simd_size / 16;
cw.MessageSIMD = dispatch.simd_size / 16,
cw.LocalXMaximum = cs_prog_data->local_size[0] - 1;
cw.LocalYMaximum = cs_prog_data->local_size[1] - 1;
cw.LocalZMaximum = cs_prog_data->local_size[2] - 1;
cw.ThreadGroupIDStartingX = group_x0;
cw.ThreadGroupIDStartingY = group_y0;
cw.ThreadGroupIDStartingZ = group_z0;
cw.ThreadGroupIDXDimension = group_x1;
cw.ThreadGroupIDYDimension = group_y1;
cw.ThreadGroupIDZDimension = group_z1;
cw.ExecutionMask = 0xffffffff;
cw.PostSync.MOCS = isl_mocs(batch->blorp->isl_dev, 0, false);
uint32_t surfaces_offset = blorp_setup_binding_table(batch, params);
uint32_t surfaces_offset = blorp_setup_binding_table(batch, params);
uint32_t samplers_offset =
params->src.enabled ? blorp_emit_sampler_state(batch) : 0;
uint32_t samplers_offset =
params->src.enabled ? blorp_emit_sampler_state(batch) : 0;
uint32_t push_const_offset;
unsigned push_const_size;
blorp_get_compute_push_const(batch, params, dispatch.threads,
&push_const_offset, &push_const_size);
struct GENX(COMPUTE_WALKER_BODY) body = {
.SIMDSize = dispatch.simd_size / 16,
.MessageSIMD = dispatch.simd_size / 16,
.LocalXMaximum = cs_prog_data->local_size[0] - 1,
.LocalYMaximum = cs_prog_data->local_size[1] - 1,
.LocalZMaximum = cs_prog_data->local_size[2] - 1,
.ThreadGroupIDStartingX = group_x0,
.ThreadGroupIDStartingY = group_y0,
.ThreadGroupIDStartingZ = group_z0,
.ThreadGroupIDXDimension = group_x1,
.ThreadGroupIDYDimension = group_y1,
.ThreadGroupIDZDimension = group_z1,
.ExecutionMask = 0xffffffff,
.PostSync.MOCS = isl_mocs(batch->blorp->isl_dev, 0, false),
uint32_t push_const_offset;
unsigned push_const_size;
blorp_get_compute_push_const(batch, params, dispatch.threads,
&push_const_offset, &push_const_size);
cw.IndirectDataStartAddress = push_const_offset;
cw.IndirectDataLength = push_const_size;
.IndirectDataStartAddress = push_const_offset,
.IndirectDataLength = push_const_size,
#if GFX_VERx10 >= 125
cw.GenerateLocalID = cs_prog_data->generate_local_id != 0;
cw.EmitLocal = cs_prog_data->generate_local_id;
cw.WalkOrder = cs_prog_data->walk_order;
cw.TileLayout = cs_prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
TileY32bpe : Linear;
.GenerateLocalID = cs_prog_data->generate_local_id != 0,
.EmitLocal = cs_prog_data->generate_local_id,
.WalkOrder = cs_prog_data->walk_order,
.TileLayout = cs_prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
TileY32bpe : Linear,
#endif
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.KernelStartPointer = params->cs_prog_kernel,
.SamplerStatePointer = samplers_offset,
.SamplerCount = params->src.enabled ? 1 : 0,
@@ -1704,7 +1703,12 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
dispatch.group_size,
dispatch.simd_size),
.NumberOfBarriers = cs_prog_data->uses_barrier,
};
},
};
assert(cs_prog_data->push.per_thread.regs == 0);
blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
cw.body = body;
}
#else

View File

@@ -143,17 +143,23 @@ genX(emit_execute)(executor_context *ec, const executor_params *params)
emit_pipe_control(ec);
#if GFX_VERx10 >= 125
executor_batch_emit(GENX(COMPUTE_WALKER), cw) {
struct GENX(COMPUTE_WALKER_BODY) body = {
#if GFX_VERx10 >= 200
cw.SIMDSize = 1;
cw.MessageSIMD = 1;
.SIMDSize = 1,
.MessageSIMD = 1,
#endif
cw.ThreadGroupIDXDimension = 1;
cw.ThreadGroupIDYDimension = 1;
cw.ThreadGroupIDZDimension = 1;
cw.ExecutionMask = 0xFFFFFFFF;
cw.PostSync.MOCS = mocs;
cw.InterfaceDescriptor = desc;
.ThreadGroupIDXDimension = 1,
.ThreadGroupIDYDimension = 1,
.ThreadGroupIDZDimension = 1,
.ExecutionMask = 0xFFFFFFFF,
.PostSync.MOCS = mocs,
.InterfaceDescriptor = desc,
};
#endif
#if GFX_VERx10 >= 125
executor_batch_emit(GENX(COMPUTE_WALKER), cw) {
cw.body = body;
};
#else
uint32_t *idd = executor_alloc_bytes_aligned(&ec->bo.extra, 8 * 4, 256);

View File

@@ -1590,66 +1590,7 @@
<field name="Compute Command Opcode" start="24" end="26" type="uint" default="2" />
<field name="Pipeline" start="27" end="28" type="uint" default="2" />
<field name="Command Type" start="29" end="31" type="uint" default="3" />
<field name="Indirect Data Length" start="64" end="80" type="uint" />
<field name="L3 prefetch disable" start="81" end="81" type="bool" />
<field name="Partition Type" start="94" end="95" type="uint">
<value name="WALKER_PARTITION_X" value="1" />
<value name="WALKER_PARTITION_Y" value="2" />
<value name="WALKER_PARTITION_Z" value="3" />
</field>
<field name="Indirect Data Start Address" start="102" end="127" type="offset" />
<field name="Message SIMD" start="145" end="146" type="uint">
<value name="SIMD8" value="0" />
<value name="SIMD16" value="1" />
<value name="SIMD32" value="2" />
</field>
<field name="Tile Layout" start="147" end="149" type="uint">
<value name="Linear" value="0" />
<value name="TileY 32bpe" value="1" />
<value name="TileY 64bpe" value="2" />
<value name="TileY 128bpe" value="3" />
</field>
<field name="Walk Order" start="150" end="152" type="uint">
<value name="Walk 012" value="0" />
<value name="Walk 021" value="1" />
<value name="Walk 102" value="2" />
<value name="Walk 120" value="3" />
<value name="Walk 201" value="4" />
<value name="Walk 210" value="5" />
</field>
<field name="Emit Inline Parameter" start="153" end="153" type="bool" />
<field name="Emit Local" start="154" end="156" type="uint">
<value name="Emit None" value="0" />
<value name="Emit X" value="1" />
<value name="Emit XY" value="3" />
<value name="Emit XYZ" value="7" />
</field>
<field name="Generate Local ID" start="157" end="157" type="bool" />
<field name="SIMD Size" start="158" end="159" type="uint">
<value name="SIMD8" value="0" />
<value name="SIMD16" value="1" />
<value name="SIMD32" value="2" />
</field>
<field name="Execution Mask" start="160" end="191" type="uint" />
<field name="Local X Maximum" start="192" end="201" type="uint" />
<field name="Local Y Maximum" start="202" end="211" type="uint" />
<field name="Local Z Maximum" start="212" end="221" type="uint" />
<field name="Thread Group ID X Dimension" start="224" end="255" type="uint" />
<field name="Thread Group ID Y Dimension" start="256" end="287" type="uint" />
<field name="Thread Group ID Z Dimension" start="288" end="319" type="uint" />
<field name="Thread Group ID Starting X" start="320" end="351" type="uint" />
<field name="Thread Group ID Starting Y" start="352" end="383" type="uint" />
<field name="Thread Group ID Starting Z" start="384" end="415" type="uint" />
<field name="Partition ID" start="416" end="447" type="uint" />
<field name="Partition Size" start="448" end="479" type="uint" />
<field name="Preempt X" start="480" end="511" type="uint" />
<field name="Preempt Y" start="512" end="543" type="uint" />
<field name="Preempt Z" start="544" end="575" type="uint" />
<field name="Interface Descriptor" start="576" end="831" type="INTERFACE_DESCRIPTOR_DATA" />
<field name="Post Sync" start="832" end="991" type="POSTSYNC_DATA" />
<group count="8" start="992" size="32">
<field name="Inline Data" start="0" end="31" type="uint" />
</group>
<field name="body" start="32" end="1248" type="COMPUTE_WALKER_BODY" />
</instruction>
<instruction name="EXECUTE_INDIRECT_DISPATCH" bias="2" length="44">
<field name="DWord Length" start="0" end="7" type="uint" default="42" />

View File

@@ -936,64 +936,7 @@
<field name="Compute Command Opcode" start="24" end="26" type="uint" default="2" />
<field name="Pipeline" start="27" end="28" type="uint" default="2" />
<field name="Command Type" start="29" end="31" type="uint" default="3" />
<field name="Indirect Data Length" start="64" end="80" type="uint" />
<field name="L3 prefetch disable" start="81" end="81" type="bool" />
<field name="Partition Type" start="94" end="95" type="uint">
<value name="WALKER_PARTITION_X" value="1" />
<value name="WALKER_PARTITION_Y" value="2" />
<value name="WALKER_PARTITION_Z" value="3" />
</field>
<field name="Indirect Data Start Address" start="102" end="127" type="offset" />
<field name="Message SIMD" start="145" end="146" type="uint">
<value name="SIMT16" value="1" />
<value name="SIMT32" value="2" />
</field>
<field name="Tile Layout" start="147" end="149" type="uint">
<value name="Linear" value="0" />
<value name="TileY 32bpe" value="1" />
<value name="TileY 64bpe" value="2" />
<value name="TileY 128bpe" value="3" />
</field>
<field name="Walk Order" start="150" end="152" type="uint">
<value name="Walk 012" value="0" />
<value name="Walk 021" value="1" />
<value name="Walk 102" value="2" />
<value name="Walk 120" value="3" />
<value name="Walk 201" value="4" />
<value name="Walk 210" value="5" />
</field>
<field name="Emit Inline Parameter" start="153" end="153" type="bool" />
<field name="Emit Local" start="154" end="156" type="uint">
<value name="Emit None" value="0" />
<value name="Emit X" value="1" />
<value name="Emit XY" value="3" />
<value name="Emit XYZ" value="7" />
</field>
<field name="Generate Local ID" start="157" end="157" type="bool" />
<field name="SIMD Size" start="158" end="159" type="uint">
<value name="SIMT16" value="1" />
<value name="SIMT32" value="2" />
</field>
<field name="Execution Mask" start="160" end="191" type="uint" />
<field name="Local X Maximum" start="192" end="201" type="uint" />
<field name="Local Y Maximum" start="202" end="211" type="uint" />
<field name="Local Z Maximum" start="212" end="221" type="uint" />
<field name="Thread Group ID X Dimension" start="224" end="255" type="uint" />
<field name="Thread Group ID Y Dimension" start="256" end="287" type="uint" />
<field name="Thread Group ID Z Dimension" start="288" end="319" type="uint" />
<field name="Thread Group ID Starting X" start="320" end="351" type="uint" />
<field name="Thread Group ID Starting Y" start="352" end="383" type="uint" />
<field name="Thread Group ID Starting Z" start="384" end="415" type="uint" />
<field name="Partition ID" start="416" end="447" type="uint" />
<field name="Partition Size" start="448" end="479" type="uint" />
<field name="Preempt X" start="480" end="511" type="uint" />
<field name="Preempt Y" start="512" end="543" type="uint" />
<field name="Preempt Z" start="544" end="575" type="uint" />
<field name="Interface Descriptor" start="608" end="863" type="INTERFACE_DESCRIPTOR_DATA" />
<field name="Post Sync" start="864" end="1023" type="POSTSYNC_DATA" />
<group count="8" start="1024" size="32">
<field name="Inline Data" start="0" end="31" type="uint" />
</group>
<field name="body" start="32" end="1280" type="COMPUTE_WALKER_BODY" />
</instruction>
<instruction name="EXECUTE_INDIRECT_DISPATCH" bias="2" length="45">
<field name="DWord Length" start="0" end="7" type="uint" default="43" />

View File

@@ -6197,11 +6197,13 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
uint32_t dwords[GENX(COMPUTE_WALKER_length)];
GENX(COMPUTE_WALKER_pack)(batch, dwords, &(struct GENX(COMPUTE_WALKER)) {
.PostSync = (struct GENX(POSTSYNC_DATA)) {
.Operation = WriteTimestamp,
.DestinationAddress = addr,
.MOCS = anv_mocs(device, NULL, 0),
},
.body = {
.PostSync = (struct GENX(POSTSYNC_DATA)) {
.Operation = WriteTimestamp,
.DestinationAddress = addr,
.MOCS = anv_mocs(device, NULL, 0),
},
}
});
for (uint32_t i = 0; i < ARRAY_SIZE(dwords); i++) {

View File

@@ -437,6 +437,37 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
num_workgroup_data[2] = groupCountZ;
}
struct GENX(COMPUTE_WALKER_BODY) body = {
.SIMDSize = dispatch.simd_size / 16,
.MessageSIMD = dispatch.simd_size / 16,
.IndirectDataStartAddress = comp_state->base.push_constants_state.offset,
.IndirectDataLength = comp_state->base.push_constants_state.alloc_size,
.GenerateLocalID = prog_data->generate_local_id != 0,
.EmitLocal = prog_data->generate_local_id,
.WalkOrder = prog_data->walk_order,
.TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
TileY32bpe : Linear,
.LocalXMaximum = prog_data->local_size[0] - 1,
.LocalYMaximum = prog_data->local_size[1] - 1,
.LocalZMaximum = prog_data->local_size[2] - 1,
.ThreadGroupIDXDimension = groupCountX,
.ThreadGroupIDYDimension = groupCountY,
.ThreadGroupIDZDimension = groupCountZ,
.ExecutionMask = dispatch.right_mask,
.PostSync = {
.MOCS = anv_mocs(pipeline->base.device, NULL, 0),
},
.InterfaceDescriptor =
get_interface_descriptor_data(cmd_buffer, pipeline->cs,
prog_data, &dispatch),
.EmitInlineParameter = prog_data->uses_inline_data,
.InlineData = {
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = num_workgroup_data[0],
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = num_workgroup_data[1],
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = num_workgroup_data[2],
}
};
cmd_buffer->state.last_compute_walker =
anv_batch_emitn(
&cmd_buffer->batch,
@@ -444,38 +475,11 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
GENX(COMPUTE_WALKER),
.IndirectParameterEnable = !anv_address_is_null(indirect_addr),
.PredicateEnable = predicate,
.SIMDSize = dispatch.simd_size / 16,
.MessageSIMD = dispatch.simd_size / 16,
.IndirectDataStartAddress = comp_state->base.push_constants_state.offset,
.IndirectDataLength = comp_state->base.push_constants_state.alloc_size,
.body = body,
#if GFX_VERx10 == 125
.SystolicModeEnable = prog_data->uses_systolic,
#endif
.GenerateLocalID = prog_data->generate_local_id != 0,
.EmitLocal = prog_data->generate_local_id,
.WalkOrder = prog_data->walk_order,
.TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
TileY32bpe : Linear,
.LocalXMaximum = prog_data->local_size[0] - 1,
.LocalYMaximum = prog_data->local_size[1] - 1,
.LocalZMaximum = prog_data->local_size[2] - 1,
.ThreadGroupIDXDimension = groupCountX,
.ThreadGroupIDYDimension = groupCountY,
.ThreadGroupIDZDimension = groupCountZ,
.ExecutionMask = dispatch.right_mask,
.PostSync = {
.MOCS = anv_mocs(pipeline->base.device, NULL, 0),
},
.InterfaceDescriptor =
get_interface_descriptor_data(cmd_buffer, pipeline->cs,
prog_data, &dispatch),
.EmitInlineParameter = prog_data->uses_inline_data,
.InlineData = {
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 0] = num_workgroup_data[0],
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 1] = num_workgroup_data[1],
[ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET / 4 + 2] = num_workgroup_data[2],
});
);
}
#else /* #if GFX_VERx10 >= 125 */
@@ -724,33 +728,39 @@ genX(cmd_buffer_dispatch_kernel)(struct anv_cmd_buffer *cmd_buffer,
struct intel_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
cw.PredicateEnable = false;
cw.SIMDSize = dispatch.simd_size / 16;
cw.MessageSIMD = dispatch.simd_size / 16;
cw.IndirectDataStartAddress = indirect_data.offset;
cw.IndirectDataLength = indirect_data.alloc_size;
cw.LocalXMaximum = cs_prog_data->local_size[0] - 1;
cw.LocalYMaximum = cs_prog_data->local_size[1] - 1;
cw.LocalZMaximum = cs_prog_data->local_size[2] - 1;
cw.ExecutionMask = dispatch.right_mask;
cw.PostSync.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
if (global_size != NULL) {
cw.ThreadGroupIDXDimension = global_size[0];
cw.ThreadGroupIDYDimension = global_size[1];
cw.ThreadGroupIDZDimension = global_size[2];
} else {
cw.IndirectParameterEnable = true;
}
cw.InterfaceDescriptor =
struct GENX(COMPUTE_WALKER_BODY) body = {
.SIMDSize = dispatch.simd_size / 16,
.MessageSIMD = dispatch.simd_size / 16,
.IndirectDataStartAddress = indirect_data.offset,
.IndirectDataLength = indirect_data.alloc_size,
.LocalXMaximum = cs_prog_data->local_size[0] - 1,
.LocalYMaximum = cs_prog_data->local_size[1] - 1,
.LocalZMaximum = cs_prog_data->local_size[2] - 1,
.ExecutionMask = dispatch.right_mask,
.PostSync.MOCS = cmd_buffer->device->isl_dev.mocs.internal,
.InterfaceDescriptor =
get_interface_descriptor_data(cmd_buffer,
kernel->bin,
cs_prog_data,
&dispatch);
&dispatch),
};
if (global_size != NULL) {
body.ThreadGroupIDXDimension = global_size[0];
body.ThreadGroupIDYDimension = global_size[1];
body.ThreadGroupIDZDimension = global_size[2];
}
cmd_buffer->state.last_compute_walker =
anv_batch_emitn(
&cmd_buffer->batch,
GENX(COMPUTE_WALKER_length),
GENX(COMPUTE_WALKER),
.IndirectParameterEnable = global_size == NULL,
.PredicateEnable = false,
.body = body,
);
/* We just blew away the compute pipeline state */
cmd_buffer->state.compute.pipeline_dirty = true;
}
@@ -1132,26 +1142,39 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
struct intel_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(device->info, cs_prog_data, NULL);
anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
cw.IndirectParameterEnable = params->is_launch_size_indirect;
cw.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
cw.SIMDSize = dispatch.simd_size / 16;
cw.MessageSIMD = dispatch.simd_size / 16;
cw.LocalXMaximum = (1 << local_size_log2[0]) - 1;
cw.LocalYMaximum = (1 << local_size_log2[1]) - 1;
cw.LocalZMaximum = (1 << local_size_log2[2]) - 1;
cw.ThreadGroupIDXDimension = global_size[0];
cw.ThreadGroupIDYDimension = global_size[1];
cw.ThreadGroupIDZDimension = global_size[2];
cw.ExecutionMask = 0xff;
cw.EmitInlineParameter = true;
cw.PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0);
const gl_shader_stage s = MESA_SHADER_RAYGEN;
struct anv_state *surfaces = &cmd_buffer->state.binding_tables[s];
struct anv_state *samplers = &cmd_buffer->state.samplers[s];
struct brw_rt_raygen_trampoline_params trampoline_params = {
.rt_disp_globals_addr = anv_address_physical(rtdg_addr),
.raygen_bsr_addr =
params->is_sbt_indirect ?
(params->indirect_sbts_addr +
offsetof(VkTraceRaysIndirectCommand2KHR,
raygenShaderRecordAddress)) :
params->raygen_sbt->deviceAddress,
.is_indirect = params->is_sbt_indirect,
.local_group_size_log2 = {
local_size_log2[0],
local_size_log2[1],
local_size_log2[2],
},
};
const gl_shader_stage s = MESA_SHADER_RAYGEN;
struct anv_device *device = cmd_buffer->device;
struct anv_state *surfaces = &cmd_buffer->state.binding_tables[s];
struct anv_state *samplers = &cmd_buffer->state.samplers[s];
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
struct GENX(COMPUTE_WALKER_BODY) body = {
.SIMDSize = dispatch.simd_size / 16,
.MessageSIMD = dispatch.simd_size / 16,
.LocalXMaximum = (1 << local_size_log2[0]) - 1,
.LocalYMaximum = (1 << local_size_log2[1]) - 1,
.LocalZMaximum = (1 << local_size_log2[2]) - 1,
.ThreadGroupIDXDimension = global_size[0],
.ThreadGroupIDYDimension = global_size[1],
.ThreadGroupIDZDimension = global_size[2],
.ExecutionMask = 0xff,
.EmitInlineParameter = true,
.PostSync.MOCS = anv_mocs(pipeline->base.device, NULL, 0),
.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.KernelStartPointer = device->rt_trampoline->kernel.offset,
.SamplerStatePointer = samplers->offset,
/* i965: DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4), */
@@ -1162,26 +1185,21 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
#if INTEL_NEEDS_WA_14017794102
.ThreadPreemption = false,
#endif
};
},
};
struct brw_rt_raygen_trampoline_params trampoline_params = {
.rt_disp_globals_addr = anv_address_physical(rtdg_addr),
.raygen_bsr_addr =
params->is_sbt_indirect ?
(params->indirect_sbts_addr +
offsetof(VkTraceRaysIndirectCommand2KHR,
raygenShaderRecordAddress)) :
params->raygen_sbt->deviceAddress,
.is_indirect = params->is_sbt_indirect,
.local_group_size_log2 = {
local_size_log2[0],
local_size_log2[1],
local_size_log2[2],
},
};
STATIC_ASSERT(sizeof(trampoline_params) == 32);
memcpy(cw.InlineData, &trampoline_params, sizeof(trampoline_params));
}
STATIC_ASSERT(sizeof(trampoline_params) == 32);
memcpy(body.InlineData, &trampoline_params, sizeof(trampoline_params));
cmd_buffer->state.last_compute_walker =
anv_batch_emitn(
&cmd_buffer->batch,
GENX(COMPUTE_WALKER_length),
GENX(COMPUTE_WALKER),
.IndirectParameterEnable = params->is_launch_size_indirect,
.PredicateEnable = cmd_buffer->state.conditional_render_enabled,
.body = body,
);
trace_intel_end_rays(&cmd_buffer->trace,
params->launch_size[0],

View File

@@ -565,30 +565,30 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
#if GFX_VERx10 >= 125
anv_batch_emit(batch, GENX(COMPUTE_WALKER), cw) {
cw.SIMDSize = dispatch.simd_size / 16;
cw.MessageSIMD = dispatch.simd_size / 16,
cw.IndirectDataStartAddress = push_state.offset;
cw.IndirectDataLength = push_state.alloc_size;
cw.LocalXMaximum = prog_data->local_size[0] - 1;
cw.LocalYMaximum = prog_data->local_size[1] - 1;
cw.LocalZMaximum = prog_data->local_size[2] - 1;
cw.ThreadGroupIDXDimension = DIV_ROUND_UP(num_threads,
dispatch.simd_size);
cw.ThreadGroupIDYDimension = 1;
cw.ThreadGroupIDZDimension = 1;
cw.ExecutionMask = dispatch.right_mask;
cw.PostSync.MOCS = anv_mocs(device, NULL, 0);
struct GENX(COMPUTE_WALKER_BODY) body = {
.SIMDSize = dispatch.simd_size / 16,
.MessageSIMD = dispatch.simd_size / 16,
.IndirectDataStartAddress = push_state.offset,
.IndirectDataLength = push_state.alloc_size,
.LocalXMaximum = prog_data->local_size[0] - 1,
.LocalYMaximum = prog_data->local_size[1] - 1,
.LocalZMaximum = prog_data->local_size[2] - 1,
.ThreadGroupIDXDimension = DIV_ROUND_UP(num_threads,
dispatch.simd_size),
.ThreadGroupIDYDimension = 1,
.ThreadGroupIDZDimension = 1,
.ExecutionMask = dispatch.right_mask,
.PostSync.MOCS = anv_mocs(device, NULL, 0),
#if GFX_VERx10 >= 125
cw.GenerateLocalID = prog_data->generate_local_id != 0;
cw.EmitLocal = prog_data->generate_local_id;
cw.WalkOrder = prog_data->walk_order;
cw.TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
TileY32bpe : Linear;
.GenerateLocalID = prog_data->generate_local_id != 0,
.EmitLocal = prog_data->generate_local_id,
.WalkOrder = prog_data->walk_order,
.TileLayout = prog_data->walk_order == INTEL_WALK_ORDER_YXZ ?
TileY32bpe : Linear,
#endif
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.KernelStartPointer = state->kernel->kernel.offset +
brw_cs_prog_data_prog_offset(prog_data,
dispatch.simd_size),
@@ -599,7 +599,11 @@ genX(emit_simple_shader_dispatch)(struct anv_simple_shader *state,
.SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER,
prog_data->base.total_shared),
.NumberOfBarriers = prog_data->uses_barrier,
};
},
};
anv_batch_emit(batch, GENX(COMPUTE_WALKER), cw) {
cw.body = body;
}
#else
const uint32_t vfe_curbe_allocation =