From 5e860879409891786527c52e78f3708911490837 Mon Sep 17 00:00:00 2001 From: Nanley Chery Date: Tue, 30 Jul 2024 21:31:43 -0400 Subject: [PATCH] intel: Move depth clear value writes to drivers This improves drivers in the following ways: * iris_hiz_exec() and crocus_hiz_exec() gets rid of the narrowly-used update_clear_depth parameters. * iris avoids fast-clearing if the aux state is CLEAR. crocus avoids this too, but didn't actually need it in the first place. * iris updates the value once per fast_clear_depth() call instead of doing an update for each layer being cleared. * anv now updates the clear value when transitioning from an undefined layout instead of doing so on every fast-clear. This should be safer because we don't perform state cache invalidates when changing the clear value. So, existing surface states won't have any stale values. Reviewed-by: Lionel Landwerlin Part-of: --- src/gallium/drivers/crocus/crocus_clear.c | 14 ++------- src/gallium/drivers/crocus/crocus_resolve.c | 9 ++---- src/gallium/drivers/crocus/crocus_resource.h | 3 +- src/gallium/drivers/iris/iris_clear.c | 28 ++++++++++------- src/gallium/drivers/iris/iris_resolve.c | 9 ++---- src/gallium/drivers/iris/iris_resource.h | 3 +- src/intel/blorp/blorp_genX_exec_brw.h | 32 +++----------------- src/intel/blorp/blorp_genX_exec_elk.h | 14 +++------ src/intel/vulkan/genX_cmd_buffer.c | 27 +++++++++++++++++ 9 files changed, 63 insertions(+), 76 deletions(-) diff --git a/src/gallium/drivers/crocus/crocus_clear.c b/src/gallium/drivers/crocus/crocus_clear.c index 739fc4766d1..abdf3409989 100644 --- a/src/gallium/drivers/crocus/crocus_clear.c +++ b/src/gallium/drivers/crocus/crocus_clear.c @@ -456,8 +456,6 @@ fast_clear_depth(struct crocus_context *ice, { struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; - bool update_clear_depth = false; - /* If we're clearing to a new clear value, then we need to resolve any clear * flags out of the HiZ buffer into the real depth buffer. */ @@ -492,14 +490,13 @@ fast_clear_depth(struct crocus_context *ice, * value so this shouldn't happen often. */ crocus_hiz_exec(ice, batch, res, res_level, layer, 1, - ISL_AUX_OP_FULL_RESOLVE, false); + ISL_AUX_OP_FULL_RESOLVE); crocus_resource_set_aux_state(ice, res, res_level, layer, 1, ISL_AUX_STATE_RESOLVED); } } const union isl_color_value clear_value = { .f32 = {depth, } }; crocus_resource_set_clear_color(ice, res, clear_value); - update_clear_depth = true; } for (unsigned l = 0; l < box->depth; l++) { @@ -507,14 +504,9 @@ fast_clear_depth(struct crocus_context *ice, crocus_resource_level_has_hiz(res, level) ? crocus_resource_get_aux_state(res, level, box->z + l) : ISL_AUX_STATE_AUX_INVALID; - if (update_clear_depth || aux_state != ISL_AUX_STATE_CLEAR) { - if (aux_state == ISL_AUX_STATE_CLEAR) { - perf_debug(&ice->dbg, "Performing HiZ clear just to update the " - "depth clear value\n"); - } + if (aux_state != ISL_AUX_STATE_CLEAR) { crocus_hiz_exec(ice, batch, res, level, - box->z + l, 1, ISL_AUX_OP_FAST_CLEAR, - update_clear_depth); + box->z + l, 1, ISL_AUX_OP_FAST_CLEAR); } } diff --git a/src/gallium/drivers/crocus/crocus_resolve.c b/src/gallium/drivers/crocus/crocus_resolve.c index 30eae441bb7..9a2947978e5 100644 --- a/src/gallium/drivers/crocus/crocus_resolve.c +++ b/src/gallium/drivers/crocus/crocus_resolve.c @@ -604,8 +604,7 @@ crocus_hiz_exec(struct crocus_context *ice, struct crocus_batch *batch, struct crocus_resource *res, unsigned int level, unsigned int start_layer, - unsigned int num_layers, enum isl_aux_op op, - bool update_clear_depth) + unsigned int num_layers, enum isl_aux_op op) { struct crocus_screen *screen = batch->screen; const struct intel_device_info *devinfo = &batch->screen->devinfo; @@ -686,9 +685,7 @@ crocus_hiz_exec(struct crocus_context *ice, &res->base.b, res->aux.usage, level, true); struct blorp_batch blorp_batch; - enum blorp_batch_flags flags = 0; - flags |= update_clear_depth ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR; - blorp_batch_init(&ice->blorp, &blorp_batch, batch, flags); + blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op); blorp_batch_finish(&blorp_batch); @@ -860,7 +857,7 @@ crocus_resource_prepare_access(struct crocus_context *ice, assert(aux_op == ISL_AUX_OP_PARTIAL_RESOLVE); crocus_mcs_partial_resolve(ice, batch, res, layer, 1); } else if (isl_aux_usage_has_hiz(res->aux.usage)) { - crocus_hiz_exec(ice, batch, res, level, layer, 1, aux_op, false); + crocus_hiz_exec(ice, batch, res, level, layer, 1, aux_op); } else if (res->aux.usage == ISL_AUX_USAGE_STC_CCS) { unreachable("crocus doesn't resolve STC_CCS resources"); } else { diff --git a/src/gallium/drivers/crocus/crocus_resource.h b/src/gallium/drivers/crocus/crocus_resource.h index 34f496cd284..40c04f5d7a5 100644 --- a/src/gallium/drivers/crocus/crocus_resource.h +++ b/src/gallium/drivers/crocus/crocus_resource.h @@ -381,8 +381,7 @@ crocus_hiz_exec(struct crocus_context *ice, struct crocus_batch *batch, struct crocus_resource *res, unsigned int level, unsigned int start_layer, - unsigned int num_layers, enum isl_aux_op op, - bool update_clear_depth); + unsigned int num_layers, enum isl_aux_op op); /** * Prepare a miptree for access diff --git a/src/gallium/drivers/iris/iris_clear.c b/src/gallium/drivers/iris/iris_clear.c index 72a5f793fc6..3d44983dda6 100644 --- a/src/gallium/drivers/iris/iris_clear.c +++ b/src/gallium/drivers/iris/iris_clear.c @@ -526,8 +526,7 @@ fast_clear_depth(struct iris_context *ice, float depth) { struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; - - bool update_clear_depth = false; + const struct intel_device_info *devinfo = batch->screen->devinfo; if (res->aux.usage == ISL_AUX_USAGE_HIZ_CCS_WT) { /* From Bspec 47010 (Depth Buffer Clear): @@ -581,16 +580,27 @@ fast_clear_depth(struct iris_context *ice, * value so this shouldn't happen often. */ iris_hiz_exec(ice, batch, res, res_level, layer, 1, - ISL_AUX_OP_FULL_RESOLVE, false); + ISL_AUX_OP_FULL_RESOLVE); iris_resource_set_aux_state(ice, res, res_level, layer, 1, ISL_AUX_STATE_RESOLVED); } } const union isl_color_value clear_value = { .f32 = {depth, } }; iris_resource_set_clear_color(ice, res, clear_value); - update_clear_depth = true; + /* Also set the indirect clear color if it exists. */ if (res->aux.clear_color_bo) { + uint32_t packed_depth; + isl_color_value_pack(&clear_value, res->surf.format, &packed_depth); + + const uint64_t clear_pixel_offset = res->aux.clear_color_offset + + isl_get_sampler_clear_field_offset(devinfo, res->surf.format); + + iris_emit_pipe_control_write(batch, "update fast clear value (Z)", + PIPE_CONTROL_WRITE_IMMEDIATE, + res->aux.clear_color_bo, + clear_pixel_offset, packed_depth); + /* From the TGL PRMs, Volume 9: Render Engine, State Caching : * * "Any values referenced by pointers within the @@ -603,6 +613,7 @@ fast_clear_depth(struct iris_context *ice, * Invalidate the state cache as suggested. */ iris_emit_pipe_control_flush(batch, "flush fast clear values (z)", + PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_STATE_CACHE_INVALIDATE); } } @@ -610,14 +621,9 @@ fast_clear_depth(struct iris_context *ice, for (unsigned l = 0; l < box->depth; l++) { enum isl_aux_state aux_state = iris_resource_get_aux_state(res, level, box->z + l); - if (update_clear_depth || aux_state != ISL_AUX_STATE_CLEAR) { - if (aux_state == ISL_AUX_STATE_CLEAR) { - perf_debug(&ice->dbg, "Performing HiZ clear just to update the " - "depth clear value\n"); - } + if (aux_state != ISL_AUX_STATE_CLEAR) { iris_hiz_exec(ice, batch, res, level, - box->z + l, 1, ISL_AUX_OP_FAST_CLEAR, - update_clear_depth); + box->z + l, 1, ISL_AUX_OP_FAST_CLEAR); } } diff --git a/src/gallium/drivers/iris/iris_resolve.c b/src/gallium/drivers/iris/iris_resolve.c index 21772f88243..f07d4fba81f 100644 --- a/src/gallium/drivers/iris/iris_resolve.c +++ b/src/gallium/drivers/iris/iris_resolve.c @@ -675,8 +675,7 @@ iris_hiz_exec(struct iris_context *ice, struct iris_batch *batch, struct iris_resource *res, unsigned int level, unsigned int start_layer, - unsigned int num_layers, enum isl_aux_op op, - bool update_clear_depth) + unsigned int num_layers, enum isl_aux_op op) { ASSERTED const struct intel_device_info *devinfo = batch->screen->devinfo; @@ -738,9 +737,7 @@ iris_hiz_exec(struct iris_context *ice, res->aux.usage, level, true); struct blorp_batch blorp_batch; - enum blorp_batch_flags flags = 0; - flags |= update_clear_depth ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR; - blorp_batch_init(&ice->blorp, &blorp_batch, batch, flags); + blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op); blorp_batch_finish(&blorp_batch); @@ -914,7 +911,7 @@ iris_resource_prepare_access(struct iris_context *ice, } else if (isl_aux_usage_has_mcs(res->aux.usage)) { iris_mcs_exec(ice, batch, res, layer, 1, aux_op); } else if (isl_aux_usage_has_hiz(res->aux.usage)) { - iris_hiz_exec(ice, batch, res, level, layer, 1, aux_op, false); + iris_hiz_exec(ice, batch, res, level, layer, 1, aux_op); } else if (res->aux.usage == ISL_AUX_USAGE_STC_CCS) { unreachable("iris doesn't resolve STC_CCS resources"); } else { diff --git a/src/gallium/drivers/iris/iris_resource.h b/src/gallium/drivers/iris/iris_resource.h index 8d8b22a228f..166d1d6c2d8 100644 --- a/src/gallium/drivers/iris/iris_resource.h +++ b/src/gallium/drivers/iris/iris_resource.h @@ -350,8 +350,7 @@ iris_hiz_exec(struct iris_context *ice, struct iris_batch *batch, struct iris_resource *res, unsigned int level, unsigned int start_layer, - unsigned int num_layers, enum isl_aux_op op, - bool update_clear_depth); + unsigned int num_layers, enum isl_aux_op op); /** * Prepare a miptree for access diff --git a/src/intel/blorp/blorp_genX_exec_brw.h b/src/intel/blorp/blorp_genX_exec_brw.h index 27570ffec45..d961ecd1bfe 100644 --- a/src/intel/blorp/blorp_genX_exec_brw.h +++ b/src/intel/blorp/blorp_genX_exec_brw.h @@ -1546,24 +1546,6 @@ blorp_update_clear_color(UNUSED struct blorp_batch *batch, #else -#if GFX_VER == 12 - if (isl_surf_usage_is_depth(info->surf.usage)) { - const struct intel_device_info *devinfo = - batch->blorp->compiler->brw->devinfo; - blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) { - sdi.Address = info->clear_color_addr; - sdi.Address.offset += - isl_get_sampler_clear_field_offset(devinfo, info->surf.format); - - isl_color_value_pack(&info->clear_color, info->surf.format, - (uint32_t *)&sdi.ImmediateData); - - sdi.ForceWriteCompletionCheck = true; - } - return; - } -#endif - for (int i = 0; i < 4; i++) { blorp_emit(batch, GENX(MI_STORE_DATA_IMM), sdi) { sdi.Address = info->clear_color_addr; @@ -1591,16 +1573,10 @@ blorp_uses_bti_rt_writes(const struct blorp_batch *batch, const struct blorp_par static void blorp_exec_3d(struct blorp_batch *batch, const struct blorp_params *params) { - if (!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR)) { - if (params->fast_clear_op == ISL_AUX_OP_FAST_CLEAR && - params->dst.clear_color_addr.buffer != NULL) { - blorp_update_clear_color(batch, ¶ms->dst); - } - - if (params->hiz_op == ISL_AUX_OP_FAST_CLEAR && - params->depth.clear_color_addr.buffer != NULL) { - blorp_update_clear_color(batch, ¶ms->depth); - } + if (!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR) && + params->fast_clear_op == ISL_AUX_OP_FAST_CLEAR && + params->dst.clear_color_addr.buffer != NULL) { + blorp_update_clear_color(batch, ¶ms->dst); } if (params->hiz_op != ISL_AUX_OP_NONE) { diff --git a/src/intel/blorp/blorp_genX_exec_elk.h b/src/intel/blorp/blorp_genX_exec_elk.h index 6c60986bcd6..fe357adbdf5 100644 --- a/src/intel/blorp/blorp_genX_exec_elk.h +++ b/src/intel/blorp/blorp_genX_exec_elk.h @@ -1879,16 +1879,10 @@ blorp_uses_bti_rt_writes(const struct blorp_batch *batch, const struct blorp_par static void blorp_exec_3d(struct blorp_batch *batch, const struct blorp_params *params) { - if (!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR)) { - if (params->fast_clear_op == ISL_AUX_OP_FAST_CLEAR && - params->dst.clear_color_addr.buffer != NULL) { - blorp_update_clear_color(batch, ¶ms->dst); - } - - if (params->hiz_op == ISL_AUX_OP_FAST_CLEAR && - params->depth.clear_color_addr.buffer != NULL) { - blorp_update_clear_color(batch, ¶ms->depth); - } + if (!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR) && + params->fast_clear_op == ISL_AUX_OP_FAST_CLEAR && + params->dst.clear_color_addr.buffer != NULL) { + blorp_update_clear_color(batch, ¶ms->dst); } #if GFX_VER >= 8 diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 8b8b1263025..a8a138e6464 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -464,6 +464,33 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer, if (image->planes[depth_plane].aux_usage == ISL_AUX_USAGE_NONE) return; + /* Initialize the indirect clear color prior to first use. */ + const struct anv_address clear_color_addr = + anv_image_get_clear_color_addr(cmd_buffer->device, image, + VK_IMAGE_ASPECT_DEPTH_BIT); + if (!anv_address_is_null(clear_color_addr) && + (initial_layout == VK_IMAGE_LAYOUT_UNDEFINED || + initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED)) { + const enum isl_format depth_format = + image->planes[depth_plane].primary_surface.isl.format; + assert(ANV_HZ_FC_VAL == 1.0f); + const uint32_t depth_value = depth_format == ISL_FORMAT_R32_FLOAT ? + 0x3f800000 : ~0; + + const uint32_t clear_pixel_offset = clear_color_addr.offset + + isl_get_sampler_clear_field_offset(cmd_buffer->device->info, + depth_format); + const struct anv_address clear_pixel_addr = { + .bo = clear_color_addr.bo, + .offset = clear_pixel_offset, + }; + + struct mi_builder b; + mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch); + mi_builder_set_write_check(&b, true); + mi_store(&b, mi_mem32(clear_pixel_addr), mi_imm(depth_value)); + } + /* If will_full_fast_clear is set, the caller promises to fast-clear the * largest portion of the specified range as it can. */