anv: Predicate fast-clear resolves
Image layouts only let us know that an image *may* be fast-cleared. For this reason we can end up with redundant resolves. Testing has shown that such resolves can measurably hurt performance and that predicating them can avoid the penalty. v2: - Introduce additional resolve state management function (Jason Ekstrand). - Enable easy retrieval of fast clear state fields. v3: Use more descriptive field enums (Jason) Signed-off-by: Nanley Chery <nanley.g.chery@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:

committed by
Jason Ekstrand

parent
8e2729fbb8
commit
67027ddf3f
@@ -1630,7 +1630,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
struct blorp_batch batch;
|
struct blorp_batch batch;
|
||||||
blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
|
blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
|
||||||
|
BLORP_BATCH_PREDICATE_ENABLE);
|
||||||
|
|
||||||
struct blorp_surf surf;
|
struct blorp_surf surf;
|
||||||
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
|
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
@@ -2090,11 +2090,16 @@ anv_fast_clear_state_entry_size(const struct anv_device *device)
|
|||||||
{
|
{
|
||||||
assert(device);
|
assert(device);
|
||||||
/* Entry contents:
|
/* Entry contents:
|
||||||
* +----------------------+
|
* +--------------------------------------------+
|
||||||
* | clear value dword(s) |
|
* | clear value dword(s) | needs resolve dword |
|
||||||
* +----------------------+
|
* +--------------------------------------------+
|
||||||
*/
|
*/
|
||||||
return device->isl_dev.ss.clear_value_size;
|
|
||||||
|
/* Ensure that the needs resolve dword is in fact dword-aligned to enable
|
||||||
|
* GPU memcpy operations.
|
||||||
|
*/
|
||||||
|
assert(device->isl_dev.ss.clear_value_size % 4 == 0);
|
||||||
|
return device->isl_dev.ss.clear_value_size + 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns true if a HiZ-enabled depth buffer can be sampled from. */
|
/* Returns true if a HiZ-enabled depth buffer can be sampled from. */
|
||||||
|
@@ -407,21 +407,92 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
|
anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum fast_clear_state_field {
|
||||||
|
FAST_CLEAR_STATE_FIELD_CLEAR_COLOR,
|
||||||
|
FAST_CLEAR_STATE_FIELD_NEEDS_RESOLVE,
|
||||||
|
};
|
||||||
|
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
get_fast_clear_state_entry_offset(const struct anv_device *device,
|
get_fast_clear_state_offset(const struct anv_device *device,
|
||||||
const struct anv_image *image,
|
const struct anv_image *image,
|
||||||
unsigned level)
|
unsigned level, enum fast_clear_state_field field)
|
||||||
{
|
{
|
||||||
assert(device && image);
|
assert(device && image);
|
||||||
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||||
assert(level < anv_image_aux_levels(image));
|
assert(level < anv_image_aux_levels(image));
|
||||||
const uint32_t offset = image->offset + image->aux_surface.offset +
|
uint32_t offset = image->offset + image->aux_surface.offset +
|
||||||
image->aux_surface.isl.size +
|
image->aux_surface.isl.size +
|
||||||
anv_fast_clear_state_entry_size(device) * level;
|
anv_fast_clear_state_entry_size(device) * level;
|
||||||
|
|
||||||
|
switch (field) {
|
||||||
|
case FAST_CLEAR_STATE_FIELD_NEEDS_RESOLVE:
|
||||||
|
offset += device->isl_dev.ss.clear_value_size;
|
||||||
|
/* Fall-through */
|
||||||
|
case FAST_CLEAR_STATE_FIELD_CLEAR_COLOR:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
assert(offset < image->offset + image->size);
|
assert(offset < image->offset + image->size);
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MI_PREDICATE_SRC0 0x2400
|
||||||
|
#define MI_PREDICATE_SRC1 0x2408
|
||||||
|
|
||||||
|
/* Manages the state of an color image subresource to ensure resolves are
|
||||||
|
* performed properly.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
genX(set_image_needs_resolve)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
const struct anv_image *image,
|
||||||
|
unsigned level, bool needs_resolve)
|
||||||
|
{
|
||||||
|
assert(cmd_buffer && image);
|
||||||
|
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||||
|
assert(level < anv_image_aux_levels(image));
|
||||||
|
|
||||||
|
const uint32_t resolve_flag_offset =
|
||||||
|
get_fast_clear_state_offset(cmd_buffer->device, image, level,
|
||||||
|
FAST_CLEAR_STATE_FIELD_NEEDS_RESOLVE);
|
||||||
|
|
||||||
|
/* The HW docs say that there is no way to guarantee the completion of
|
||||||
|
* the following command. We use it nevertheless because it shows no
|
||||||
|
* issues in testing is currently being used in the GL driver.
|
||||||
|
*/
|
||||||
|
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
||||||
|
sdi.Address = (struct anv_address) { image->bo, resolve_flag_offset };
|
||||||
|
sdi.ImmediateData = needs_resolve;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
genX(load_needs_resolve_predicate)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
|
const struct anv_image *image,
|
||||||
|
unsigned level)
|
||||||
|
{
|
||||||
|
assert(cmd_buffer && image);
|
||||||
|
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||||
|
assert(level < anv_image_aux_levels(image));
|
||||||
|
|
||||||
|
const uint32_t resolve_flag_offset =
|
||||||
|
get_fast_clear_state_offset(cmd_buffer->device, image, level,
|
||||||
|
FAST_CLEAR_STATE_FIELD_NEEDS_RESOLVE);
|
||||||
|
|
||||||
|
/* Make the pending predicated resolve a no-op if one is not needed.
|
||||||
|
* predicate = do_resolve = resolve_flag != 0;
|
||||||
|
*/
|
||||||
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 , 0);
|
||||||
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
|
||||||
|
emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 , 0);
|
||||||
|
emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
|
||||||
|
image->bo, resolve_flag_offset);
|
||||||
|
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
|
||||||
|
mip.LoadOperation = LOAD_LOADINV;
|
||||||
|
mip.CombineOperation = COMBINE_SET;
|
||||||
|
mip.CompareOperation = COMPARE_SRCS_EQUAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
|
init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
|
||||||
const struct anv_image *image,
|
const struct anv_image *image,
|
||||||
@@ -431,6 +502,15 @@ init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||||
assert(level < anv_image_aux_levels(image));
|
assert(level < anv_image_aux_levels(image));
|
||||||
|
|
||||||
|
/* The resolve flag should updated to signify that fast-clear/compression
|
||||||
|
* data needs to be removed when leaving the undefined layout. Such data
|
||||||
|
* may need to be removed if it would cause accesses to the color buffer
|
||||||
|
* to return incorrect data. The fast clear data in CCS_D buffers should
|
||||||
|
* be removed because CCS_D isn't enabled all the time.
|
||||||
|
*/
|
||||||
|
genX(set_image_needs_resolve)(cmd_buffer, image, level,
|
||||||
|
image->aux_usage == ISL_AUX_USAGE_NONE);
|
||||||
|
|
||||||
/* The fast clear value dword(s) will be copied into a surface state object.
|
/* The fast clear value dword(s) will be copied into a surface state object.
|
||||||
* Ensure that the restrictions of the fields in the dword(s) are followed.
|
* Ensure that the restrictions of the fields in the dword(s) are followed.
|
||||||
*
|
*
|
||||||
@@ -446,7 +526,8 @@ init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
for (; i < cmd_buffer->device->isl_dev.ss.clear_value_size; i += 4) {
|
for (; i < cmd_buffer->device->isl_dev.ss.clear_value_size; i += 4) {
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
||||||
const uint32_t entry_offset =
|
const uint32_t entry_offset =
|
||||||
get_fast_clear_state_entry_offset(cmd_buffer->device, image, level);
|
get_fast_clear_state_offset(cmd_buffer->device, image, level,
|
||||||
|
FAST_CLEAR_STATE_FIELD_CLEAR_COLOR);
|
||||||
sdi.Address = (struct anv_address) { image->bo, entry_offset + i };
|
sdi.Address = (struct anv_address) { image->bo, entry_offset + i };
|
||||||
|
|
||||||
if (GEN_GEN >= 9) {
|
if (GEN_GEN >= 9) {
|
||||||
@@ -493,7 +574,8 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
uint32_t ss_clear_offset = surface_state.offset +
|
uint32_t ss_clear_offset = surface_state.offset +
|
||||||
cmd_buffer->device->isl_dev.ss.clear_value_offset;
|
cmd_buffer->device->isl_dev.ss.clear_value_offset;
|
||||||
uint32_t entry_offset =
|
uint32_t entry_offset =
|
||||||
get_fast_clear_state_entry_offset(cmd_buffer->device, image, level);
|
get_fast_clear_state_offset(cmd_buffer->device, image, level,
|
||||||
|
FAST_CLEAR_STATE_FIELD_CLEAR_COLOR);
|
||||||
unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
|
unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
|
||||||
|
|
||||||
if (copy_from_surface_state) {
|
if (copy_from_surface_state) {
|
||||||
@@ -680,6 +762,8 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
layer_count = MIN2(layer_count, anv_image_aux_layers(image, level));
|
layer_count = MIN2(layer_count, anv_image_aux_layers(image, level));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
genX(load_needs_resolve_predicate)(cmd_buffer, image, level);
|
||||||
|
|
||||||
/* Create a surface state with the right clear color and perform the
|
/* Create a surface state with the right clear color and perform the
|
||||||
* resolve.
|
* resolve.
|
||||||
*/
|
*/
|
||||||
@@ -711,6 +795,8 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
image->aux_usage == ISL_AUX_USAGE_CCS_E ?
|
image->aux_usage == ISL_AUX_USAGE_CCS_E ?
|
||||||
BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL :
|
BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL :
|
||||||
BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
|
BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
|
||||||
|
|
||||||
|
genX(set_image_needs_resolve)(cmd_buffer, image, level, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd_buffer->state.pending_pipe_bits |=
|
cmd_buffer->state.pending_pipe_bits |=
|
||||||
@@ -2460,9 +2546,6 @@ void genX(CmdDispatch)(
|
|||||||
#define GPGPU_DISPATCHDIMY 0x2504
|
#define GPGPU_DISPATCHDIMY 0x2504
|
||||||
#define GPGPU_DISPATCHDIMZ 0x2508
|
#define GPGPU_DISPATCHDIMZ 0x2508
|
||||||
|
|
||||||
#define MI_PREDICATE_SRC0 0x2400
|
|
||||||
#define MI_PREDICATE_SRC1 0x2408
|
|
||||||
|
|
||||||
void genX(CmdDispatchIndirect)(
|
void genX(CmdDispatchIndirect)(
|
||||||
VkCommandBuffer commandBuffer,
|
VkCommandBuffer commandBuffer,
|
||||||
VkBuffer _buffer,
|
VkBuffer _buffer,
|
||||||
@@ -2869,6 +2952,21 @@ cmd_buffer_subpass_sync_fast_clear_values(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color_rt_state,
|
genX(copy_fast_clear_dwords)(cmd_buffer, att_state->color_rt_state,
|
||||||
iview->image, iview->isl.base_level,
|
iview->image, iview->isl.base_level,
|
||||||
true /* copy from ss */);
|
true /* copy from ss */);
|
||||||
|
|
||||||
|
/* Fast-clears impact whether or not a resolve will be necessary. */
|
||||||
|
if (iview->image->aux_usage == ISL_AUX_USAGE_CCS_E &&
|
||||||
|
att_state->clear_color_is_zero) {
|
||||||
|
/* This image always has the auxiliary buffer enabled. We can mark
|
||||||
|
* the subresource as not needing a resolve because the clear color
|
||||||
|
* will match what's in every RENDER_SURFACE_STATE object when it's
|
||||||
|
* being used for sampling.
|
||||||
|
*/
|
||||||
|
genX(set_image_needs_resolve)(cmd_buffer, iview->image,
|
||||||
|
iview->isl.base_level, false);
|
||||||
|
} else {
|
||||||
|
genX(set_image_needs_resolve)(cmd_buffer, iview->image,
|
||||||
|
iview->isl.base_level, true);
|
||||||
|
}
|
||||||
} else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
|
} else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
|
||||||
/* The attachment may have been fast-cleared in a previous render
|
/* The attachment may have been fast-cleared in a previous render
|
||||||
* pass and the value is needed now. Update the surface state(s).
|
* pass and the value is needed now. Update the surface state(s).
|
||||||
|
Reference in New Issue
Block a user