anv: Add support for fast clears on gen9
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
@@ -1193,16 +1193,35 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
|
|||||||
struct blorp_surf surf;
|
struct blorp_surf surf;
|
||||||
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
|
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
att_state->aux_usage, &surf);
|
att_state->aux_usage, &surf);
|
||||||
|
surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
|
||||||
|
|
||||||
const VkRect2D render_area = cmd_buffer->state.render_area;
|
const VkRect2D render_area = cmd_buffer->state.render_area;
|
||||||
|
|
||||||
|
if (att_state->fast_clear) {
|
||||||
|
blorp_fast_clear(&batch, &surf, iview->isl.format,
|
||||||
|
iview->isl.base_level,
|
||||||
|
iview->isl.base_array_layer, fb->layers,
|
||||||
|
render_area.offset.x, render_area.offset.y,
|
||||||
|
render_area.offset.x + render_area.extent.width,
|
||||||
|
render_area.offset.y + render_area.extent.height);
|
||||||
|
|
||||||
|
/* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
|
||||||
|
*
|
||||||
|
* "After Render target fast clear, pipe-control with color cache
|
||||||
|
* write-flush must be issued before sending any DRAW commands on
|
||||||
|
* that render target."
|
||||||
|
*/
|
||||||
|
cmd_buffer->state.pending_pipe_bits |=
|
||||||
|
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
|
||||||
|
} else {
|
||||||
blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle,
|
blorp_clear(&batch, &surf, iview->isl.format, iview->isl.swizzle,
|
||||||
iview->isl.base_level,
|
iview->isl.base_level,
|
||||||
iview->isl.base_array_layer, fb->layers,
|
iview->isl.base_array_layer, fb->layers,
|
||||||
render_area.offset.x, render_area.offset.y,
|
render_area.offset.x, render_area.offset.y,
|
||||||
render_area.offset.x + render_area.extent.width,
|
render_area.offset.x + render_area.extent.width,
|
||||||
render_area.offset.y + render_area.extent.height,
|
render_area.offset.y + render_area.extent.height,
|
||||||
vk_to_isl_color(att_state->clear_value.color), NULL);
|
surf.clear_color, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
att_state->pending_clear_aspects = 0;
|
att_state->pending_clear_aspects = 0;
|
||||||
}
|
}
|
||||||
@@ -1313,10 +1332,12 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
struct anv_attachment_state *att_state =
|
struct anv_attachment_state *att_state =
|
||||||
&cmd_buffer->state.attachments[att];
|
&cmd_buffer->state.attachments[att];
|
||||||
|
|
||||||
assert(att_state->aux_usage != ISL_AUX_USAGE_CCS_D);
|
if (att_state->aux_usage == ISL_AUX_USAGE_NONE)
|
||||||
if (att_state->aux_usage != ISL_AUX_USAGE_CCS_E)
|
|
||||||
return; /* Nothing to resolve */
|
return; /* Nothing to resolve */
|
||||||
|
|
||||||
|
assert(att_state->aux_usage == ISL_AUX_USAGE_CCS_E ||
|
||||||
|
att_state->aux_usage == ISL_AUX_USAGE_CCS_D);
|
||||||
|
|
||||||
struct anv_render_pass *pass = cmd_buffer->state.pass;
|
struct anv_render_pass *pass = cmd_buffer->state.pass;
|
||||||
struct anv_subpass *subpass = cmd_buffer->state.subpass;
|
struct anv_subpass *subpass = cmd_buffer->state.subpass;
|
||||||
unsigned subpass_idx = subpass - pass->subpasses;
|
unsigned subpass_idx = subpass - pass->subpasses;
|
||||||
@@ -1327,14 +1348,17 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
* of a particular attachment. That way we only resolve once but it's
|
* of a particular attachment. That way we only resolve once but it's
|
||||||
* still hot in the cache.
|
* still hot in the cache.
|
||||||
*/
|
*/
|
||||||
|
bool found_draw = false;
|
||||||
|
enum anv_subpass_usage usage = 0;
|
||||||
for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) {
|
for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) {
|
||||||
enum anv_subpass_usage usage = pass->attachments[att].subpass_usage[s];
|
usage |= pass->attachments[att].subpass_usage[s];
|
||||||
|
|
||||||
if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) {
|
if (usage & (ANV_SUBPASS_USAGE_DRAW | ANV_SUBPASS_USAGE_RESOLVE_DST)) {
|
||||||
/* We found another subpass that draws to this attachment. We'll
|
/* We found another subpass that draws to this attachment. We'll
|
||||||
* wait to resolve until then.
|
* wait to resolve until then.
|
||||||
*/
|
*/
|
||||||
return;
|
found_draw = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1342,12 +1366,60 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
const struct anv_image *image = iview->image;
|
const struct anv_image *image = iview->image;
|
||||||
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||||
|
|
||||||
if (image->aux_usage == ISL_AUX_USAGE_CCS_E)
|
enum blorp_fast_clear_op resolve_op = BLORP_FAST_CLEAR_OP_NONE;
|
||||||
|
if (!found_draw) {
|
||||||
|
/* This is the last subpass that writes to this attachment so we need to
|
||||||
|
* resolve here. Ideally, we would like to only resolve if the storeOp
|
||||||
|
* is set to VK_ATTACHMENT_STORE_OP_STORE. However, we need to ensure
|
||||||
|
* that the CCS bits are set to "resolved" because there may be copy or
|
||||||
|
* blit operations (which may ignore CCS) between now and the next time
|
||||||
|
* we render and we need to ensure that anything they write will be
|
||||||
|
* respected in the next render. Unfortunately, the hardware does not
|
||||||
|
* provide us with any sort of "invalidate" pass that sets the CCS to
|
||||||
|
* "resolved" without writing to the render target.
|
||||||
|
*/
|
||||||
|
if (iview->image->aux_usage != ISL_AUX_USAGE_CCS_E) {
|
||||||
|
/* The image destination surface doesn't support compression outside
|
||||||
|
* the render pass. We need a full resolve.
|
||||||
|
*/
|
||||||
|
resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
|
||||||
|
} else if (att_state->fast_clear) {
|
||||||
|
/* We don't know what to do with clear colors outside the render
|
||||||
|
* pass. We need a partial resolve.
|
||||||
|
*/
|
||||||
|
resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
|
||||||
|
} else {
|
||||||
|
/* The image "natively" supports all the compression we care about
|
||||||
|
* and we don't need to resolve at all. If this is the case, we also
|
||||||
|
* don't need to resolve for any of the input attachment cases below.
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
} else if (usage & ANV_SUBPASS_USAGE_INPUT) {
|
||||||
|
/* Input attachments are clear-color aware so, at least on Sky Lake, we
|
||||||
|
* can frequently sample from them with no resolves at all.
|
||||||
|
*/
|
||||||
|
if (att_state->aux_usage != att_state->input_aux_usage) {
|
||||||
|
assert(att_state->input_aux_usage == ISL_AUX_USAGE_NONE);
|
||||||
|
resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
|
||||||
|
} else if (!att_state->clear_color_is_zero_one) {
|
||||||
|
/* Sky Lake PRM, Vol. 2d, RENDER_SURFACE_STATE::Red Clear Color:
|
||||||
|
*
|
||||||
|
* "If Number of Multisamples is MULTISAMPLECOUNT_1 AND if this RT
|
||||||
|
* is fast cleared with non-0/1 clear value, this RT must be
|
||||||
|
* partially resolved (refer to Partial Resolve operation) before
|
||||||
|
* binding this surface to Sampler."
|
||||||
|
*/
|
||||||
|
resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (resolve_op == BLORP_FAST_CLEAR_OP_NONE)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
struct blorp_surf surf;
|
struct blorp_surf surf;
|
||||||
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
|
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
att_state->aux_usage, &surf);
|
att_state->aux_usage, &surf);
|
||||||
|
surf.clear_color = vk_to_isl_color(att_state->clear_value.color);
|
||||||
|
|
||||||
/* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
|
/* From the Sky Lake PRM Vol. 7, "Render Target Resolve":
|
||||||
*
|
*
|
||||||
@@ -1368,12 +1440,14 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
blorp_ccs_resolve(batch, &surf,
|
blorp_ccs_resolve(batch, &surf,
|
||||||
iview->isl.base_level,
|
iview->isl.base_level,
|
||||||
iview->isl.base_array_layer + layer,
|
iview->isl.base_array_layer + layer,
|
||||||
iview->isl.format,
|
iview->isl.format, resolve_op);
|
||||||
BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd_buffer->state.pending_pipe_bits |=
|
cmd_buffer->state.pending_pipe_bits |=
|
||||||
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
|
ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
|
||||||
|
|
||||||
|
/* Once we've done any sort of resolve, we're no longer fast-cleared */
|
||||||
|
att_state->fast_clear = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@@ -1100,11 +1100,14 @@ void anv_dynamic_state_copy(struct anv_dynamic_state *dest,
|
|||||||
*/
|
*/
|
||||||
struct anv_attachment_state {
|
struct anv_attachment_state {
|
||||||
enum isl_aux_usage aux_usage;
|
enum isl_aux_usage aux_usage;
|
||||||
|
enum isl_aux_usage input_aux_usage;
|
||||||
struct anv_state color_rt_state;
|
struct anv_state color_rt_state;
|
||||||
struct anv_state input_att_state;
|
struct anv_state input_att_state;
|
||||||
|
|
||||||
VkImageAspectFlags pending_clear_aspects;
|
VkImageAspectFlags pending_clear_aspects;
|
||||||
|
bool fast_clear;
|
||||||
VkClearValue clear_value;
|
VkClearValue clear_value;
|
||||||
|
bool clear_color_is_zero_one;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** State required while building cmd buffer */
|
/** State required while building cmd buffer */
|
||||||
|
@@ -191,23 +191,87 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static enum isl_aux_usage
|
static bool
|
||||||
fb_attachment_get_aux_usage(struct anv_device *device,
|
color_is_zero_one(VkClearColorValue value, enum isl_format format)
|
||||||
struct anv_framebuffer *fb,
|
|
||||||
uint32_t attachment)
|
|
||||||
{
|
{
|
||||||
struct anv_image_view *iview = fb->attachments[attachment];
|
if (isl_format_has_int_channel(format)) {
|
||||||
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
if (value.int32[i] != 0 && value.int32[i] != 1)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
|
if (value.float32[i] != 0.0f && value.float32[i] != 1.0f)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (iview->image->aux_surface.isl.size == 0)
|
return true;
|
||||||
return ISL_AUX_USAGE_NONE; /* No aux surface */
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
color_attachment_compute_aux_usage(struct anv_device *device,
|
||||||
|
struct anv_attachment_state *att_state,
|
||||||
|
struct anv_image_view *iview,
|
||||||
|
VkRect2D render_area,
|
||||||
|
union isl_color_value *fast_clear_color)
|
||||||
|
{
|
||||||
|
if (iview->image->aux_surface.isl.size == 0) {
|
||||||
|
att_state->aux_usage = ISL_AUX_USAGE_NONE;
|
||||||
|
att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
|
||||||
|
att_state->fast_clear = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
assert(iview->image->aux_surface.isl.usage & ISL_SURF_USAGE_CCS_BIT);
|
assert(iview->image->aux_surface.isl.usage & ISL_SURF_USAGE_CCS_BIT);
|
||||||
|
|
||||||
if (isl_format_supports_lossless_compression(&device->info,
|
att_state->clear_color_is_zero_one =
|
||||||
iview->isl.format))
|
color_is_zero_one(att_state->clear_value.color, iview->isl.format);
|
||||||
return ISL_AUX_USAGE_CCS_E;
|
|
||||||
|
|
||||||
return ISL_AUX_USAGE_NONE;
|
if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||||
|
/* Start off assuming fast clears are possible */
|
||||||
|
att_state->fast_clear = true;
|
||||||
|
|
||||||
|
/* Potentially, we could do partial fast-clears but doing so has crazy
|
||||||
|
* alignment restrictions. It's easier to just restrict to full size
|
||||||
|
* fast clears for now.
|
||||||
|
*/
|
||||||
|
if (render_area.offset.x != 0 ||
|
||||||
|
render_area.offset.y != 0 ||
|
||||||
|
render_area.extent.width != iview->extent.width ||
|
||||||
|
render_area.extent.height != iview->extent.height)
|
||||||
|
att_state->fast_clear = false;
|
||||||
|
|
||||||
|
if (att_state->fast_clear) {
|
||||||
|
memcpy(fast_clear_color->u32, att_state->clear_value.color.uint32,
|
||||||
|
sizeof(fast_clear_color->u32));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
att_state->fast_clear = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isl_format_supports_lossless_compression(&device->info,
|
||||||
|
iview->isl.format)) {
|
||||||
|
att_state->aux_usage = ISL_AUX_USAGE_CCS_E;
|
||||||
|
att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E;
|
||||||
|
} else if (att_state->fast_clear) {
|
||||||
|
att_state->aux_usage = ISL_AUX_USAGE_CCS_D;
|
||||||
|
/* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
|
||||||
|
*
|
||||||
|
* "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
|
||||||
|
* setting is only allowed if Surface Format supported for Fast
|
||||||
|
* Clear. In addition, if the surface is bound to the sampling
|
||||||
|
* engine, Surface Format must be supported for Render Target
|
||||||
|
* Compression for surfaces bound to the sampling engine."
|
||||||
|
*
|
||||||
|
* In other words, we can't sample from a fast-cleared image if it
|
||||||
|
* doesn't also support color compression.
|
||||||
|
*/
|
||||||
|
att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
|
||||||
|
} else {
|
||||||
|
att_state->aux_usage = ISL_AUX_USAGE_NONE;
|
||||||
|
att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
@@ -350,9 +414,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
struct anv_image_view *iview = framebuffer->attachments[i];
|
struct anv_image_view *iview = framebuffer->attachments[i];
|
||||||
assert(iview->vk_format == att->format);
|
assert(iview->vk_format == att->format);
|
||||||
|
|
||||||
|
union isl_color_value clear_color = { .u32 = { 0, } };
|
||||||
if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||||
state->attachments[i].aux_usage =
|
color_attachment_compute_aux_usage(cmd_buffer->device,
|
||||||
fb_attachment_get_aux_usage(cmd_buffer->device, framebuffer, i);
|
&state->attachments[i],
|
||||||
|
iview, begin->renderArea,
|
||||||
|
&clear_color);
|
||||||
|
|
||||||
struct isl_view view = iview->isl;
|
struct isl_view view = iview->isl;
|
||||||
view.usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
|
view.usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
|
||||||
@@ -362,6 +429,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
.view = &view,
|
.view = &view,
|
||||||
.aux_surf = &iview->image->aux_surface.isl,
|
.aux_surf = &iview->image->aux_surface.isl,
|
||||||
.aux_usage = state->attachments[i].aux_usage,
|
.aux_usage = state->attachments[i].aux_usage,
|
||||||
|
.clear_color = clear_color,
|
||||||
.mocs = cmd_buffer->device->default_mocs);
|
.mocs = cmd_buffer->device->default_mocs);
|
||||||
|
|
||||||
add_image_view_relocs(cmd_buffer, iview,
|
add_image_view_relocs(cmd_buffer, iview,
|
||||||
@@ -369,6 +437,7 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
state->attachments[i].color_rt_state);
|
state->attachments[i].color_rt_state);
|
||||||
} else {
|
} else {
|
||||||
state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
|
state->attachments[i].aux_usage = ISL_AUX_USAGE_NONE;
|
||||||
|
state->attachments[i].input_aux_usage = ISL_AUX_USAGE_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (need_input_attachment_state(&pass->attachments[i])) {
|
if (need_input_attachment_state(&pass->attachments[i])) {
|
||||||
@@ -386,11 +455,12 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
.surf = surf,
|
.surf = surf,
|
||||||
.view = &view,
|
.view = &view,
|
||||||
.aux_surf = &iview->image->aux_surface.isl,
|
.aux_surf = &iview->image->aux_surface.isl,
|
||||||
.aux_usage = state->attachments[i].aux_usage,
|
.aux_usage = state->attachments[i].input_aux_usage,
|
||||||
|
.clear_color = clear_color,
|
||||||
.mocs = cmd_buffer->device->default_mocs);
|
.mocs = cmd_buffer->device->default_mocs);
|
||||||
|
|
||||||
add_image_view_relocs(cmd_buffer, iview,
|
add_image_view_relocs(cmd_buffer, iview,
|
||||||
state->attachments[i].aux_usage,
|
state->attachments[i].input_aux_usage,
|
||||||
state->attachments[i].input_att_state);
|
state->attachments[i].input_att_state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user