anv: Use brw_cs_get_dispatch_info()

And since right_mask is already provided as part of dispatch_info,
just use that instead of storing it.

Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10504>
This commit is contained in:
Caio Marcelo de Oliveira Filho
2021-04-28 10:56:58 -07:00
parent 59cbd50bfa
commit 279acf1031
5 changed files with 24 additions and 50 deletions

View File

@@ -1058,9 +1058,10 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
const struct brw_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
const unsigned total_push_constants_size =
brw_cs_push_const_total_size(cs_prog_data, cs_params.threads);
brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
if (total_push_constants_size == 0)
return (struct anv_state) { .offset = 0 };
@@ -1089,7 +1090,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
}
if (cs_prog_data->push.per_thread.size > 0) {
for (unsigned t = 0; t < cs_params.threads; t++) {
for (unsigned t = 0; t < dispatch.threads; t++) {
memcpy(dst, src, cs_prog_data->push.per_thread.size);
uint32_t *subgroup_id = dst +

View File

@@ -1876,24 +1876,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
return VK_SUCCESS;
}
struct anv_cs_parameters
anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
{
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
struct anv_cs_parameters cs_params = {};
cs_params.group_size = cs_prog_data->local_size[0] *
cs_prog_data->local_size[1] *
cs_prog_data->local_size[2];
cs_params.simd_size =
brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
cs_prog_data, cs_params.group_size);
cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
return cs_params;
}
/**
* Copy pipeline state not marked as dynamic.
* Dynamic state is pipeline state which hasn't been provided at pipeline

View File

@@ -3430,7 +3430,6 @@ struct anv_compute_pipeline {
struct anv_pipeline base;
struct anv_shader_bin * cs;
uint32_t cs_right_mask;
uint32_t batch_data[9];
uint32_t interface_descriptor_data[8];
};
@@ -3515,15 +3514,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
const char *entrypoint,
const VkSpecializationInfo *spec_info);
struct anv_cs_parameters {
uint32_t group_size;
uint32_t simd_size;
uint32_t threads;
};
struct anv_cs_parameters
anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
struct anv_format_plane {
enum isl_format isl_format:16;
struct isl_swizzle swizzle;

View File

@@ -4559,12 +4559,15 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
const struct anv_shader_bin *cs_bin = pipeline->cs;
bool predicate = cmd_buffer->state.conditional_render_enabled;
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
const struct intel_device_info *devinfo = &pipeline->base.device->info;
const struct brw_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
cw.IndirectParameterEnable = indirect;
cw.PredicateEnable = predicate;
cw.SIMDSize = cs_params.simd_size / 16;
cw.SIMDSize = dispatch.simd_size / 16;
cw.IndirectDataStartAddress = comp_state->push_data.offset;
cw.IndirectDataLength = comp_state->push_data.alloc_size;
cw.LocalXMaximum = prog_data->local_size[0] - 1;
@@ -4573,7 +4576,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
cw.ThreadGroupIDXDimension = groupCountX;
cw.ThreadGroupIDYDimension = groupCountY;
cw.ThreadGroupIDZDimension = groupCountZ;
cw.ExecutionMask = pipeline->cs_right_mask;
cw.ExecutionMask = dispatch.right_mask;
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.KernelStartPointer = cs_bin->kernel.offset,
@@ -4583,7 +4586,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
.BindingTableEntryCount =
1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
.NumberofThreadsinGPGPUThreadGroup = cs_params.threads,
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
.SharedLocalMemorySize = encode_slm_size(GFX_VER,
prog_data->base.total_shared),
.BarrierEnable = prog_data->uses_barrier,
@@ -4602,19 +4605,22 @@ emit_gpgpu_walker(struct anv_cmd_buffer *cmd_buffer,
{
bool predicate = (GFX_VER <= 7 && indirect) ||
cmd_buffer->state.conditional_render_enabled;
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
const struct intel_device_info *devinfo = &pipeline->base.device->info;
const struct brw_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
ggw.IndirectParameterEnable = indirect;
ggw.PredicateEnable = predicate;
ggw.SIMDSize = cs_params.simd_size / 16;
ggw.SIMDSize = dispatch.simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
ggw.ThreadWidthCounterMaximum = dispatch.threads - 1;
ggw.ThreadGroupIDXDimension = groupCountX;
ggw.ThreadGroupIDYDimension = groupCountY;
ggw.ThreadGroupIDZDimension = groupCountZ;
ggw.RightExecutionMask = pipeline->cs_right_mask;
ggw.RightExecutionMask = dispatch.right_mask;
ggw.BottomExecutionMask = 0xffffffff;
}

View File

@@ -2513,9 +2513,6 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
pipeline->cs_right_mask = brw_cs_right_mask(cs_params.group_size, cs_params.simd_size);
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs;
@@ -2535,22 +2532,20 @@ static void
emit_compute_state(struct anv_compute_pipeline *pipeline,
const struct anv_device *device)
{
const struct intel_device_info *devinfo = &device->info;
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
pipeline->cs_right_mask = brw_cs_right_mask(cs_params.group_size, cs_params.simd_size);
const struct brw_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
const uint32_t vfe_curbe_allocation =
ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads +
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
cs_prog_data->push.cross_thread.regs, 2);
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
const struct anv_shader_bin *cs_bin = pipeline->cs;
const struct intel_device_info *devinfo = &device->info;
anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
#if GFX_VER > 7
@@ -2598,7 +2593,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
.KernelStartPointer =
cs_bin->kernel.offset +
brw_cs_prog_data_prog_offset(cs_prog_data, cs_params.simd_size),
brw_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size),
/* Wa_1606682166 */
.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(cs_bin),
@@ -2631,7 +2626,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
.ThreadPreemptionDisable = true,
#endif
.NumberofThreadsinGPGPUThreadGroup = cs_params.threads,
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
};
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
pipeline->interface_descriptor_data,