anv: Enable push constants on gen12-hp

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8342>
This commit is contained in:
Jason Ekstrand
2020-05-04 18:08:35 -05:00
committed by Jordan Justen
parent 369eab9420
commit 8f85e68dea
4 changed files with 27 additions and 11 deletions

View File

@@ -1046,6 +1046,7 @@ anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
struct anv_state struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
{ {
const struct gen_device_info *devinfo = &cmd_buffer->device->info;
struct anv_push_constants *data = struct anv_push_constants *data =
&cmd_buffer->state.compute.base.push_constants; &cmd_buffer->state.compute.base.push_constants;
struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline; struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
@@ -1062,10 +1063,16 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->device->info.gen < 8 ? 32 : 64; cmd_buffer->device->info.gen < 8 ? 32 : 64;
const unsigned aligned_total_push_constants_size = const unsigned aligned_total_push_constants_size =
ALIGN(total_push_constants_size, push_constant_alignment); ALIGN(total_push_constants_size, push_constant_alignment);
struct anv_state state = struct anv_state state;
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, if (devinfo->gen > 12 || gen_device_info_is_12hp(devinfo)) {
state = anv_state_stream_alloc(&cmd_buffer->general_state_stream,
aligned_total_push_constants_size, aligned_total_push_constants_size,
push_constant_alignment); push_constant_alignment);
} else {
state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
aligned_total_push_constants_size,
push_constant_alignment);
}
void *dst = state.map; void *dst = state.map;
const void *src = (char *)data + (range->start * 32); const void *src = (char *)data + (range->start * 32);

View File

@@ -35,6 +35,7 @@ anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
void *mem_ctx) void *mem_ctx)
{ {
const struct brw_compiler *compiler = pdevice->compiler; const struct brw_compiler *compiler = pdevice->compiler;
const struct gen_device_info *devinfo = compiler->devinfo;
memset(map->push_ranges, 0, sizeof(map->push_ranges)); memset(map->push_ranges, 0, sizeof(map->push_ranges));
bool has_const_ubo = false; bool has_const_ubo = false;
@@ -91,7 +92,8 @@ anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
push_end = MAX2(push_end, push_reg_mask_end); push_end = MAX2(push_end, push_reg_mask_end);
} }
if (nir->info.stage == MESA_SHADER_COMPUTE) { if (nir->info.stage == MESA_SHADER_COMPUTE &&
(devinfo->gen <= 12 && !gen_device_info_is_12hp(devinfo))) {
/* For compute shaders, we always have to have the subgroup ID. The /* For compute shaders, we always have to have the subgroup ID. The
* back-end compiler will "helpfully" add it for us in the last push * back-end compiler will "helpfully" add it for us in the last push
* constant slot. Yes, there is an off-by-one error here but that's * constant slot. Yes, there is an off-by-one error here but that's

View File

@@ -2922,6 +2922,8 @@ struct anv_cmd_compute_state {
bool pipeline_dirty; bool pipeline_dirty;
struct anv_state push_data;
struct anv_address num_workgroups; struct anv_address num_workgroups;
}; };

View File

@@ -4383,7 +4383,8 @@ void genX(CmdEndTransformFeedbackEXT)(
void void
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
{ {
struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline; struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
struct anv_compute_pipeline *pipeline = comp_state->pipeline;
assert(pipeline->cs); assert(pipeline->cs);
@@ -4449,15 +4450,17 @@ genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
} }
if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) { if (cmd_buffer->state.push_constants_dirty & VK_SHADER_STAGE_COMPUTE_BIT) {
struct anv_state push_state = comp_state->push_data =
anv_cmd_buffer_cs_push_constants(cmd_buffer); anv_cmd_buffer_cs_push_constants(cmd_buffer);
if (push_state.alloc_size) { #if GEN_GEN <= 12 && !GEN_IS_GEN12HP
if (comp_state->push_data.alloc_size) {
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) { anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
curbe.CURBETotalDataLength = push_state.alloc_size; curbe.CURBETotalDataLength = comp_state->push_data.alloc_size;
curbe.CURBEDataStartAddress = push_state.offset; curbe.CURBEDataStartAddress = comp_state->push_data.offset;
} }
} }
#endif
cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT; cmd_buffer->state.push_constants_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
} }
@@ -4526,6 +4529,7 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountX, uint32_t groupCountY,
uint32_t groupCountZ) uint32_t groupCountZ)
{ {
struct anv_cmd_compute_state *comp_state = &cmd_buffer->state.compute;
const struct anv_shader_bin *cs_bin = pipeline->cs; const struct anv_shader_bin *cs_bin = pipeline->cs;
bool predicate = cmd_buffer->state.conditional_render_enabled; bool predicate = cmd_buffer->state.conditional_render_enabled;
const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline); const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
@@ -4534,6 +4538,8 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
cw.IndirectParameterEnable = indirect; cw.IndirectParameterEnable = indirect;
cw.PredicateEnable = predicate; cw.PredicateEnable = predicate;
cw.SIMDSize = cs_params.simd_size / 16; cw.SIMDSize = cs_params.simd_size / 16;
cw.IndirectDataStartAddress = comp_state->push_data.offset;
cw.IndirectDataLength = comp_state->push_data.alloc_size;
cw.LocalXMaximum = prog_data->local_size[0] - 1; cw.LocalXMaximum = prog_data->local_size[0] - 1;
cw.LocalYMaximum = prog_data->local_size[1] - 1; cw.LocalYMaximum = prog_data->local_size[1] - 1;
cw.LocalZMaximum = prog_data->local_size[2] - 1; cw.LocalZMaximum = prog_data->local_size[2] - 1;
@@ -4542,7 +4548,6 @@ emit_compute_walker(struct anv_cmd_buffer *cmd_buffer,
cw.ThreadGroupIDZDimension = groupCountZ; cw.ThreadGroupIDZDimension = groupCountZ;
cw.ExecutionMask = pipeline->cs_right_mask; cw.ExecutionMask = pipeline->cs_right_mask;
assert(brw_cs_push_const_total_size(prog_data, cs_params.threads) == 0);
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA_HP)) { cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA_HP)) {
.KernelStartPointer = cs_bin->kernel.offset, .KernelStartPointer = cs_bin->kernel.offset,
.SamplerStatePointer = .SamplerStatePointer =