anv: Use new helper functions to pick SIMD variant for CS
Also combine the existing individual anv helpers into a single one for all CS related parameters. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5142>
This commit is contained in:
@@ -838,9 +838,9 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
|
||||
|
||||
const uint32_t threads = anv_cs_threads(pipeline);
|
||||
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
|
||||
const unsigned total_push_constants_size =
|
||||
brw_cs_push_const_total_size(cs_prog_data, threads);
|
||||
brw_cs_push_const_total_size(cs_prog_data, cs_params.threads);
|
||||
if (total_push_constants_size == 0)
|
||||
return (struct anv_state) { .offset = 0 };
|
||||
|
||||
@@ -863,7 +863,7 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
|
||||
if (cs_prog_data->push.per_thread.size > 0) {
|
||||
for (unsigned t = 0; t < threads; t++) {
|
||||
for (unsigned t = 0; t < cs_params.threads; t++) {
|
||||
memcpy(dst, src, cs_prog_data->push.per_thread.size);
|
||||
|
||||
uint32_t *subgroup_id = dst +
|
||||
|
@@ -1728,21 +1728,22 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline)
|
||||
struct anv_cs_parameters
|
||||
anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
|
||||
{
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
return cs_prog_data->local_size[0] *
|
||||
|
||||
struct anv_cs_parameters cs_params = {};
|
||||
|
||||
cs_params.group_size = cs_prog_data->local_size[0] *
|
||||
cs_prog_data->local_size[1] *
|
||||
cs_prog_data->local_size[2];
|
||||
}
|
||||
cs_params.simd_size =
|
||||
brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
|
||||
cs_prog_data, cs_params.group_size);
|
||||
cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
|
||||
|
||||
uint32_t
|
||||
anv_cs_threads(const struct anv_compute_pipeline *pipeline)
|
||||
{
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
return DIV_ROUND_UP(anv_cs_workgroup_size(pipeline),
|
||||
cs_prog_data->simd_size);
|
||||
return cs_params;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -3413,11 +3413,14 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
||||
const char *entrypoint,
|
||||
const VkSpecializationInfo *spec_info);
|
||||
|
||||
uint32_t
|
||||
anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline);
|
||||
struct anv_cs_parameters {
|
||||
uint32_t group_size;
|
||||
uint32_t simd_size;
|
||||
uint32_t threads;
|
||||
};
|
||||
|
||||
uint32_t
|
||||
anv_cs_threads(const struct anv_compute_pipeline *pipeline);
|
||||
struct anv_cs_parameters
|
||||
anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
|
||||
|
||||
struct anv_format_plane {
|
||||
enum isl_format isl_format:16;
|
||||
|
@@ -4360,12 +4360,14 @@ void genX(CmdDispatchBase)(
|
||||
if (cmd_buffer->state.conditional_render_enabled)
|
||||
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||
|
||||
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
|
||||
ggw.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
|
||||
ggw.SIMDSize = prog_data->simd_size / 16;
|
||||
ggw.SIMDSize = cs_params.simd_size / 16;
|
||||
ggw.ThreadDepthCounterMaximum = 0;
|
||||
ggw.ThreadHeightCounterMaximum = 0;
|
||||
ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
|
||||
ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
|
||||
ggw.ThreadGroupIDXDimension = groupCountX;
|
||||
ggw.ThreadGroupIDYDimension = groupCountY;
|
||||
ggw.ThreadGroupIDZDimension = groupCountZ;
|
||||
@@ -4474,14 +4476,16 @@ void genX(CmdDispatchIndirect)(
|
||||
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
|
||||
#endif
|
||||
|
||||
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
|
||||
|
||||
anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
|
||||
ggw.IndirectParameterEnable = true;
|
||||
ggw.PredicateEnable = GEN_GEN <= 7 ||
|
||||
cmd_buffer->state.conditional_render_enabled;
|
||||
ggw.SIMDSize = prog_data->simd_size / 16;
|
||||
ggw.SIMDSize = cs_params.simd_size / 16;
|
||||
ggw.ThreadDepthCounterMaximum = 0;
|
||||
ggw.ThreadHeightCounterMaximum = 0;
|
||||
ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
|
||||
ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
|
||||
ggw.RightExecutionMask = pipeline->cs_right_mask;
|
||||
ggw.BottomExecutionMask = 0xffffffff;
|
||||
}
|
||||
|
@@ -2325,19 +2325,16 @@ compute_pipeline_create(
|
||||
|
||||
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
|
||||
|
||||
uint32_t group_size = cs_prog_data->local_size[0] *
|
||||
cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
|
||||
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
|
||||
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
|
||||
uint32_t remainder = cs_params.group_size & (cs_params.simd_size - 1);
|
||||
|
||||
if (remainder > 0)
|
||||
pipeline->cs_right_mask = ~0u >> (32 - remainder);
|
||||
else
|
||||
pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
|
||||
|
||||
const uint32_t threads = anv_cs_threads(pipeline);
|
||||
pipeline->cs_right_mask = ~0u >> (32 - cs_params.simd_size);
|
||||
|
||||
const uint32_t vfe_curbe_allocation =
|
||||
ALIGN(cs_prog_data->push.per_thread.regs * threads +
|
||||
ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads +
|
||||
cs_prog_data->push.cross_thread.regs, 2);
|
||||
|
||||
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
|
||||
@@ -2388,7 +2385,10 @@ compute_pipeline_create(
|
||||
}
|
||||
|
||||
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
|
||||
.KernelStartPointer = cs_bin->kernel.offset,
|
||||
.KernelStartPointer =
|
||||
cs_bin->kernel.offset +
|
||||
brw_cs_prog_data_prog_offset(cs_prog_data, cs_params.simd_size),
|
||||
|
||||
/* WA_1606682166 */
|
||||
.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(cs_bin),
|
||||
/* We add 1 because the CS indirect parameters buffer isn't accounted
|
||||
@@ -2420,7 +2420,7 @@ compute_pipeline_create(
|
||||
.ThreadPreemptionDisable = true,
|
||||
#endif
|
||||
|
||||
.NumberofThreadsinGPGPUThreadGroup = threads,
|
||||
.NumberofThreadsinGPGPUThreadGroup = cs_params.threads,
|
||||
};
|
||||
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
|
||||
pipeline->interface_descriptor_data,
|
||||
|
Reference in New Issue
Block a user