anv: Use new helper functions to pick SIMD variant for CS

Also combine the existing individual anv helpers into a single one for
all CS related parameters.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5142>
This commit is contained in:
Caio Marcelo de Oliveira Filho
2020-05-21 00:17:27 -07:00
parent 594374dd8d
commit 9b8347c988
5 changed files with 40 additions and 32 deletions

View File

@@ -838,9 +838,9 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
const uint32_t threads = anv_cs_threads(pipeline);
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
const unsigned total_push_constants_size =
brw_cs_push_const_total_size(cs_prog_data, threads);
brw_cs_push_const_total_size(cs_prog_data, cs_params.threads);
if (total_push_constants_size == 0)
return (struct anv_state) { .offset = 0 };
@@ -863,7 +863,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
}
if (cs_prog_data->push.per_thread.size > 0) {
for (unsigned t = 0; t < threads; t++) {
for (unsigned t = 0; t < cs_params.threads; t++) {
memcpy(dst, src, cs_prog_data->push.per_thread.size);
uint32_t *subgroup_id = dst +

View File

@@ -1728,21 +1728,22 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
return VK_SUCCESS;
}
uint32_t
anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline)
struct anv_cs_parameters
anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
{
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
return cs_prog_data->local_size[0] *
struct anv_cs_parameters cs_params = {};
cs_params.group_size = cs_prog_data->local_size[0] *
cs_prog_data->local_size[1] *
cs_prog_data->local_size[2];
}
cs_params.simd_size =
brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
cs_prog_data, cs_params.group_size);
cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
uint32_t
anv_cs_threads(const struct anv_compute_pipeline *pipeline)
{
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
return DIV_ROUND_UP(anv_cs_workgroup_size(pipeline),
cs_prog_data->simd_size);
return cs_params;
}
/**

View File

@@ -3413,11 +3413,14 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
const char *entrypoint,
const VkSpecializationInfo *spec_info);
uint32_t
anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline);
struct anv_cs_parameters {
uint32_t group_size;
uint32_t simd_size;
uint32_t threads;
};
uint32_t
anv_cs_threads(const struct anv_compute_pipeline *pipeline);
struct anv_cs_parameters
anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
struct anv_format_plane {
enum isl_format isl_format:16;

View File

@@ -4360,12 +4360,14 @@ void genX(CmdDispatchBase)(
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
ggw.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
ggw.SIMDSize = prog_data->simd_size / 16;
ggw.SIMDSize = cs_params.simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
ggw.ThreadGroupIDXDimension = groupCountX;
ggw.ThreadGroupIDYDimension = groupCountY;
ggw.ThreadGroupIDZDimension = groupCountZ;
@@ -4474,14 +4476,16 @@ void genX(CmdDispatchIndirect)(
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
#endif
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
ggw.IndirectParameterEnable = true;
ggw.PredicateEnable = GEN_GEN <= 7 ||
cmd_buffer->state.conditional_render_enabled;
ggw.SIMDSize = prog_data->simd_size / 16;
ggw.SIMDSize = cs_params.simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
ggw.RightExecutionMask = pipeline->cs_right_mask;
ggw.BottomExecutionMask = 0xffffffff;
}

View File

@@ -2325,19 +2325,16 @@ compute_pipeline_create(
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
uint32_t group_size = cs_prog_data->local_size[0] *
cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
uint32_t remainder = cs_params.group_size & (cs_params.simd_size - 1);
if (remainder > 0)
pipeline->cs_right_mask = ~0u >> (32 - remainder);
else
pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
const uint32_t threads = anv_cs_threads(pipeline);
pipeline->cs_right_mask = ~0u >> (32 - cs_params.simd_size);
const uint32_t vfe_curbe_allocation =
ALIGN(cs_prog_data->push.per_thread.regs * threads +
ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads +
cs_prog_data->push.cross_thread.regs, 2);
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
@@ -2388,7 +2385,10 @@ compute_pipeline_create(
}
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
.KernelStartPointer = cs_bin->kernel.offset,
.KernelStartPointer =
cs_bin->kernel.offset +
brw_cs_prog_data_prog_offset(cs_prog_data, cs_params.simd_size),
/* WA_1606682166 */
.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(cs_bin),
/* We add 1 because the CS indirect parameters buffer isn't accounted
@@ -2420,7 +2420,7 @@ compute_pipeline_create(
.ThreadPreemptionDisable = true,
#endif
.NumberofThreadsinGPGPUThreadGroup = threads,
.NumberofThreadsinGPGPUThreadGroup = cs_params.threads,
};
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
pipeline->interface_descriptor_data,