anv: Use brw_cs_get_dispatch_info()

And since right_mask is already provided as part of dispatch_info,
just use that instead of storing it.

Reviewed-by: Marcin Ślusarz <marcin.slusarz@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10504>
This commit is contained in:
Caio Marcelo de Oliveira Filho
2021-04-28 10:56:58 -07:00
parent 59cbd50bfa
commit 279acf1031
5 changed files with 24 additions and 50 deletions

View File

@@ -1058,9 +1058,10 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0]; const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline); const struct brw_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
const unsigned total_push_constants_size = const unsigned total_push_constants_size =
brw_cs_push_const_total_size(cs_prog_data, cs_params.threads); brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
if (total_push_constants_size == 0) if (total_push_constants_size == 0)
return (struct anv_state) { .offset = 0 }; return (struct anv_state) { .offset = 0 };
@@ -1089,7 +1090,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
} }
if (cs_prog_data->push.per_thread.size > 0) { if (cs_prog_data->push.per_thread.size > 0) {
for (unsigned t = 0; t < cs_params.threads; t++) { for (unsigned t = 0; t < dispatch.threads; t++) {
memcpy(dst, src, cs_prog_data->push.per_thread.size); memcpy(dst, src, cs_prog_data->push.per_thread.size);
uint32_t *subgroup_id = dst + uint32_t *subgroup_id = dst +

View File

@@ -1876,24 +1876,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
return VK_SUCCESS; return VK_SUCCESS;
} }
struct anv_cs_parameters
anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
{
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
struct anv_cs_parameters cs_params = {};
cs_params.group_size = cs_prog_data->local_size[0] *
cs_prog_data->local_size[1] *
cs_prog_data->local_size[2];
cs_params.simd_size =
brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
cs_prog_data, cs_params.group_size);
cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
return cs_params;
}
/** /**
* Copy pipeline state not marked as dynamic. * Copy pipeline state not marked as dynamic.
* Dynamic state is pipeline state which hasn't been provided at pipeline * Dynamic state is pipeline state which hasn't been provided at pipeline

View File

@@ -3430,7 +3430,6 @@ struct anv_compute_pipeline {
struct anv_pipeline base; struct anv_pipeline base;
struct anv_shader_bin * cs; struct anv_shader_bin * cs;
uint32_t cs_right_mask;
uint32_t batch_data[9]; uint32_t batch_data[9];
uint32_t interface_descriptor_data[8]; uint32_t interface_descriptor_data[8];
}; };
@@ -3515,15 +3514,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
const char *entrypoint, const char *entrypoint,
const VkSpecializationInfo *spec_info); const VkSpecializationInfo *spec_info);
struct anv_cs_parameters {
uint32_t group_size;
uint32_t simd_size;
uint32_t threads;
};
struct anv_cs_parameters
anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
struct anv_format_plane { struct anv_format_plane {
enum isl_format isl_format:16; enum isl_format isl_format:16;
struct isl_swizzle swizzle; struct isl_swizzle swizzle;

View File

@@ -4559,12 +4559,15 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute; struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
const struct anv_shader_bin *cs_bin = pipeline->cs; const struct anv_shader_bin *cs_bin = pipeline->cs;
bool predicate = cmd_buffer->state.conditional_render_enabled; bool predicate = cmd_buffer->state.conditional_render_enabled;
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
const struct intel_device_info *devinfo = &pipeline->base.device->info;
const struct brw_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) { anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
cw.IndirectParameterEnable = indirect; cw.IndirectParameterEnable = indirect;
cw.PredicateEnable = predicate; cw.PredicateEnable = predicate;
cw.SIMDSize = cs_params.simd_size / 16; cw.SIMDSize = dispatch.simd_size / 16;
cw.IndirectDataStartAddress = comp_state->push_data.offset; cw.IndirectDataStartAddress = comp_state->push_data.offset;
cw.IndirectDataLength = comp_state->push_data.alloc_size; cw.IndirectDataLength = comp_state->push_data.alloc_size;
cw.LocalXMaximum = prog_data->local_size[0] - 1; cw.LocalXMaximum = prog_data->local_size[0] - 1;
@@ -4573,7 +4576,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
cw.ThreadGroupIDXDimension = groupCountX; cw.ThreadGroupIDXDimension = groupCountX;
cw.ThreadGroupIDYDimension = groupCountY; cw.ThreadGroupIDYDimension = groupCountY;
cw.ThreadGroupIDZDimension = groupCountZ; cw.ThreadGroupIDZDimension = groupCountZ;
cw.ExecutionMask = pipeline->cs_right_mask; cw.ExecutionMask = dispatch.right_mask;
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) { cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.KernelStartPointer = cs_bin->kernel.offset, .KernelStartPointer = cs_bin->kernel.offset,
@@ -4583,7 +4586,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset, cmd_buffer->state.binding_tables[MESA_SHADER_COMPUTE].offset,
.BindingTableEntryCount = .BindingTableEntryCount =
1 + MIN2(pipeline->cs->bind_map.surface_count, 30), 1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
.NumberofThreadsinGPGPUThreadGroup = cs_params.threads, .NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
.SharedLocalMemorySize = encode_slm_size(GFX_VER, .SharedLocalMemorySize = encode_slm_size(GFX_VER,
prog_data->base.total_shared), prog_data->base.total_shared),
.BarrierEnable = prog_data->uses_barrier, .BarrierEnable = prog_data->uses_barrier,
@@ -4602,19 +4605,22 @@ emit_gpgpu_walker(struct anv_cmd_buffer *cmd_buffer,
{ {
bool predicate = (GFX_VER <= 7 && indirect) || bool predicate = (GFX_VER <= 7 && indirect) ||
cmd_buffer->state.conditional_render_enabled; cmd_buffer->state.conditional_render_enabled;
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
const struct intel_device_info *devinfo = &pipeline->base.device->info;
const struct brw_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, prog_data, NULL);
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) { anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
ggw.IndirectParameterEnable = indirect; ggw.IndirectParameterEnable = indirect;
ggw.PredicateEnable = predicate; ggw.PredicateEnable = predicate;
ggw.SIMDSize = cs_params.simd_size / 16; ggw.SIMDSize = dispatch.simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0;
ggw.ThreadWidthCounterMaximum = cs_params.threads - 1; ggw.ThreadWidthCounterMaximum = dispatch.threads - 1;
ggw.ThreadGroupIDXDimension = groupCountX; ggw.ThreadGroupIDXDimension = groupCountX;
ggw.ThreadGroupIDYDimension = groupCountY; ggw.ThreadGroupIDYDimension = groupCountY;
ggw.ThreadGroupIDZDimension = groupCountZ; ggw.ThreadGroupIDZDimension = groupCountZ;
ggw.RightExecutionMask = pipeline->cs_right_mask; ggw.RightExecutionMask = dispatch.right_mask;
ggw.BottomExecutionMask = 0xffffffff; ggw.BottomExecutionMask = 0xffffffff;
} }

View File

@@ -2513,9 +2513,6 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0); anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
pipeline->cs_right_mask = brw_cs_right_mask(cs_params.group_size, cs_params.simd_size);
const uint32_t subslices = MAX2(device->physical->subslice_total, 1); const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs; const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs;
@@ -2535,22 +2532,20 @@ static void
emit_compute_state(struct anv_compute_pipeline *pipeline, emit_compute_state(struct anv_compute_pipeline *pipeline,
const struct anv_device *device) const struct anv_device *device)
{ {
const struct intel_device_info *devinfo = &device->info;
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline); const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0); anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline); const struct brw_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
pipeline->cs_right_mask = brw_cs_right_mask(cs_params.group_size, cs_params.simd_size);
const uint32_t vfe_curbe_allocation = const uint32_t vfe_curbe_allocation =
ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads + ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
cs_prog_data->push.cross_thread.regs, 2); cs_prog_data->push.cross_thread.regs, 2);
const uint32_t subslices = MAX2(device->physical->subslice_total, 1); const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
const struct anv_shader_bin *cs_bin = pipeline->cs; const struct anv_shader_bin *cs_bin = pipeline->cs;
const struct intel_device_info *devinfo = &device->info;
anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) { anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
#if GFX_VER > 7 #if GFX_VER > 7
@@ -2598,7 +2593,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = { struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
.KernelStartPointer = .KernelStartPointer =
cs_bin->kernel.offset + cs_bin->kernel.offset +
brw_cs_prog_data_prog_offset(cs_prog_data, cs_params.simd_size), brw_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size),
/* Wa_1606682166 */ /* Wa_1606682166 */
.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(cs_bin), .SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(cs_bin),
@@ -2631,7 +2626,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
.ThreadPreemptionDisable = true, .ThreadPreemptionDisable = true,
#endif #endif
.NumberofThreadsinGPGPUThreadGroup = cs_params.threads, .NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
}; };
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
pipeline->interface_descriptor_data, pipeline->interface_descriptor_data,