anv: Use brw_cs_get_dispatch_info()
And since right_mask is already provided as part of dispatch_info, just use that instead of storing it. Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10504>
This commit is contained in:
@@ -1058,9 +1058,10 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||||
const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
|
const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
|
||||||
|
|
||||||
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
|
const struct brw_cs_dispatch_info dispatch =
|
||||||
|
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
|
||||||
const unsigned total_push_constants_size =
|
const unsigned total_push_constants_size =
|
||||||
brw_cs_push_const_total_size(cs_prog_data, cs_params.threads);
|
brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
|
||||||
if (total_push_constants_size == 0)
|
if (total_push_constants_size == 0)
|
||||||
return (struct anv_state) { .offset = 0 };
|
return (struct anv_state) { .offset = 0 };
|
||||||
|
|
||||||
@@ -1089,7 +1090,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (cs_prog_data->push.per_thread.size > 0) {
|
if (cs_prog_data->push.per_thread.size > 0) {
|
||||||
for (unsigned t = 0; t < cs_params.threads; t++) {
|
for (unsigned t = 0; t < dispatch.threads; t++) {
|
||||||
memcpy(dst, src, cs_prog_data->push.per_thread.size);
|
memcpy(dst, src, cs_prog_data->push.per_thread.size);
|
||||||
|
|
||||||
uint32_t *subgroup_id = dst +
|
uint32_t *subgroup_id = dst +
|
||||||
|
@@ -1876,24 +1876,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct anv_cs_parameters
|
|
||||||
anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
|
|
||||||
{
|
|
||||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
|
||||||
|
|
||||||
struct anv_cs_parameters cs_params = {};
|
|
||||||
|
|
||||||
cs_params.group_size = cs_prog_data->local_size[0] *
|
|
||||||
cs_prog_data->local_size[1] *
|
|
||||||
cs_prog_data->local_size[2];
|
|
||||||
cs_params.simd_size =
|
|
||||||
brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
|
|
||||||
cs_prog_data, cs_params.group_size);
|
|
||||||
cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
|
|
||||||
|
|
||||||
return cs_params;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy pipeline state not marked as dynamic.
|
* Copy pipeline state not marked as dynamic.
|
||||||
* Dynamic state is pipeline state which hasn't been provided at pipeline
|
* Dynamic state is pipeline state which hasn't been provided at pipeline
|
||||||
|
@@ -3430,7 +3430,6 @@ struct anv_compute_pipeline {
|
|||||||
struct anv_pipeline base;
|
struct anv_pipeline base;
|
||||||
|
|
||||||
struct anv_shader_bin * cs;
|
struct anv_shader_bin * cs;
|
||||||
uint32_t cs_right_mask;
|
|
||||||
uint32_t batch_data[9];
|
uint32_t batch_data[9];
|
||||||
uint32_t interface_descriptor_data[8];
|
uint32_t interface_descriptor_data[8];
|
||||||
};
|
};
|
||||||
@@ -3515,15 +3514,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||||||
const char *entrypoint,
|
const char *entrypoint,
|
||||||
const VkSpecializationInfo *spec_info);
|
const VkSpecializationInfo *spec_info);
|
||||||
|
|
||||||
struct anv_cs_parameters {
|
|
||||||
uint32_t group_size;
|
|
||||||
uint32_t simd_size;
|
|
||||||
uint32_t threads;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct anv_cs_parameters
|
|
||||||
anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
|
|
||||||
|
|
||||||
struct anv_format_plane {
|
struct anv_format_plane {
|
||||||
enum isl_format isl_format:16;
|
enum isl_format isl_format:16;
|
||||||
struct isl_swizzle swizzle;
|
struct isl_swizzle swizzle;
|
||||||
|
@@ -4559,12 +4559,15 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
|
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
|
||||||
const struct anv_shader_bin *cs_bin = pipeline->cs;
|
const struct anv_shader_bin *cs_bin = pipeline->cs;
|
||||||
bool predicate = cmd_buffer->state.conditional_render_enabled;
|
bool predicate = cmd_buffer->state.conditional_render_enabled;
|
||||||
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
|
|
||||||
|
const struct intel_device_info *devinfo = &pipeline->base.device->info;
|
||||||
|
const struct brw_cs_dispatch_info dispatch =
|
||||||
|
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
|
||||||
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
|
anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
|
||||||
cw.IndirectParameterEnable = indirect;
|
cw.IndirectParameterEnable = indirect;
|
||||||
cw.PredicateEnable = predicate;
|
cw.PredicateEnable = predicate;
|
||||||
cw.SIMDSize = cs_params.simd_size / 16;
|
cw.SIMDSize = dispatch.simd_size / 16;
|
||||||
cw.IndirectDataStartAddress = comp_state->push_data.offset;
|
cw.IndirectDataStartAddress = comp_state->push_data.offset;
|
||||||
cw.IndirectDataLength = comp_state->push_data.alloc_size;
|
cw.IndirectDataLength = comp_state->push_data.alloc_size;
|
||||||
cw.LocalXMaximum = prog_data->local_size[0] - 1;
|
cw.LocalXMaximum = prog_data->local_size[0] - 1;
|
||||||
@@ -4573,7 +4576,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
cw.ThreadGroupIDXDimension = groupCountX;
|
cw.ThreadGroupIDXDimension = groupCountX;
|
||||||
cw.ThreadGroupIDYDimension = groupCountY;
|
cw.ThreadGroupIDYDimension = groupCountY;
|
||||||
cw.ThreadGroupIDZDimension = groupCountZ;
|
cw.ThreadGroupIDZDimension = groupCountZ;
|
||||||
cw.ExecutionMask = pipeline->cs_right_mask;
|
cw.ExecutionMask = dispatch.right_mask;
|
||||||
|
|
||||||
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
|
||||||
.KernelStartPointer = cs_bin->kernel.offset,
|
.KernelStartPointer = cs_bin->kernel.offset,
|
||||||
@@ -4583,7 +4586,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
|
cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
|
||||||
.BindingTableEntryCount =
|
.BindingTableEntryCount =
|
||||||
1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
|
1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
|
||||||
.NumberofThreadsinGPGPUThreadGroup = cs_params.threads,
|
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||||
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
|
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
|
||||||
prog_data->base.total_shared),
|
prog_data->base.total_shared),
|
||||||
.BarrierEnable = prog_data->uses_barrier,
|
.BarrierEnable = prog_data->uses_barrier,
|
||||||
@@ -4602,19 +4605,22 @@ emit_gpgpu_walker(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
{
|
{
|
||||||
bool predicate = (GFX_VER <= 7 && indirect) ||
|
bool predicate = (GFX_VER <= 7 && indirect) ||
|
||||||
cmd_buffer->state.conditional_render_enabled;
|
cmd_buffer->state.conditional_render_enabled;
|
||||||
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
|
|
||||||
|
const struct intel_device_info *devinfo = &pipeline->base.device->info;
|
||||||
|
const struct brw_cs_dispatch_info dispatch =
|
||||||
|
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
|
||||||
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
|
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
|
||||||
ggw.IndirectParameterEnable = indirect;
|
ggw.IndirectParameterEnable = indirect;
|
||||||
ggw.PredicateEnable = predicate;
|
ggw.PredicateEnable = predicate;
|
||||||
ggw.SIMDSize = cs_params.simd_size / 16;
|
ggw.SIMDSize = dispatch.simd_size / 16;
|
||||||
ggw.ThreadDepthCounterMaximum = 0;
|
ggw.ThreadDepthCounterMaximum = 0;
|
||||||
ggw.ThreadHeightCounterMaximum = 0;
|
ggw.ThreadHeightCounterMaximum = 0;
|
||||||
ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
|
ggw.ThreadWidthCounterMaximum = dispatch.threads - 1;
|
||||||
ggw.ThreadGroupIDXDimension = groupCountX;
|
ggw.ThreadGroupIDXDimension = groupCountX;
|
||||||
ggw.ThreadGroupIDYDimension = groupCountY;
|
ggw.ThreadGroupIDYDimension = groupCountY;
|
||||||
ggw.ThreadGroupIDZDimension = groupCountZ;
|
ggw.ThreadGroupIDZDimension = groupCountZ;
|
||||||
ggw.RightExecutionMask = pipeline->cs_right_mask;
|
ggw.RightExecutionMask = dispatch.right_mask;
|
||||||
ggw.BottomExecutionMask = 0xffffffff;
|
ggw.BottomExecutionMask = 0xffffffff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -2513,9 +2513,6 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
|
|||||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||||
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
|
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
|
||||||
|
|
||||||
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
|
|
||||||
pipeline->cs_right_mask = brw_cs_right_mask(cs_params.group_size, cs_params.simd_size);
|
|
||||||
|
|
||||||
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
|
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
|
||||||
|
|
||||||
const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs;
|
const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs;
|
||||||
@@ -2535,22 +2532,20 @@ static void
|
|||||||
emit_compute_state(struct anv_compute_pipeline *pipeline,
|
emit_compute_state(struct anv_compute_pipeline *pipeline,
|
||||||
const struct anv_device *device)
|
const struct anv_device *device)
|
||||||
{
|
{
|
||||||
|
const struct intel_device_info *devinfo = &device->info;
|
||||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||||
|
|
||||||
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
|
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
|
||||||
|
|
||||||
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
|
const struct brw_cs_dispatch_info dispatch =
|
||||||
|
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
|
||||||
pipeline->cs_right_mask = brw_cs_right_mask(cs_params.group_size, cs_params.simd_size);
|
|
||||||
|
|
||||||
const uint32_t vfe_curbe_allocation =
|
const uint32_t vfe_curbe_allocation =
|
||||||
ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads +
|
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
|
||||||
cs_prog_data->push.cross_thread.regs, 2);
|
cs_prog_data->push.cross_thread.regs, 2);
|
||||||
|
|
||||||
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
|
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
|
||||||
|
|
||||||
const struct anv_shader_bin *cs_bin = pipeline->cs;
|
const struct anv_shader_bin *cs_bin = pipeline->cs;
|
||||||
const struct intel_device_info *devinfo = &device->info;
|
|
||||||
|
|
||||||
anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
|
anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
|
||||||
#if GFX_VER > 7
|
#if GFX_VER > 7
|
||||||
@@ -2598,7 +2593,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
|
|||||||
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
|
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
|
||||||
.KernelStartPointer =
|
.KernelStartPointer =
|
||||||
cs_bin->kernel.offset +
|
cs_bin->kernel.offset +
|
||||||
brw_cs_prog_data_prog_offset(cs_prog_data, cs_params.simd_size),
|
brw_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size),
|
||||||
|
|
||||||
/* Wa_1606682166 */
|
/* Wa_1606682166 */
|
||||||
.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(cs_bin),
|
.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(cs_bin),
|
||||||
@@ -2631,7 +2626,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
|
|||||||
.ThreadPreemptionDisable = true,
|
.ThreadPreemptionDisable = true,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.NumberofThreadsinGPGPUThreadGroup = cs_params.threads,
|
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
||||||
};
|
};
|
||||||
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
|
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
|
||||||
pipeline->interface_descriptor_data,
|
pipeline->interface_descriptor_data,
|
||||||
|
Reference in New Issue
Block a user