diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 9c20bb003c4..058770bcbfd 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1117,6 +1117,35 @@ radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cmd_buffer->cs, 0); } +/* + *with DCC some colors don't require CMASK elimiation before being + * used as a texture. This sets a predicate value to determine if the + * cmask eliminate is required. + */ +void +radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + bool value) +{ + uint64_t pred_val = value; + uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo); + va += image->offset + image->dcc_pred_offset; + + if (!image->surface.dcc_size) + return; + + cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8); + + radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0)); + radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); + radeon_emit(cmd_buffer->cs, pred_val); + radeon_emit(cmd_buffer->cs, pred_val >> 32); +} + void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index a8af4fd6d68..9e54b95ac3f 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -712,12 +712,16 @@ static void radv_image_alloc_cmask(struct radv_device *device, struct radv_image *image) { + uint32_t clear_value_size = 0; radv_image_get_cmask_info(device, image, &image->cmask); image->cmask.offset = align64(image->size, image->cmask.alignment); /* + 8 for storing the clear values */ - image->clear_value_offset = image->cmask.offset + image->cmask.size; - image->size = image->cmask.offset + image->cmask.size + 8; + if (!image->clear_value_offset) { + image->clear_value_offset = image->cmask.offset + image->cmask.size; + clear_value_size = 8; + } + image->size = image->cmask.offset + image->cmask.size + clear_value_size; image->alignment = MAX2(image->alignment, image->cmask.alignment); } @@ -726,9 +730,10 @@ radv_image_alloc_dcc(struct radv_device *device, struct radv_image *image) { image->dcc_offset = align64(image->size, image->surface.dcc_alignment); - /* + 8 for storing the clear values */ + /* + 16 for storing the clear values + dcc pred */ image->clear_value_offset = image->dcc_offset + image->surface.dcc_size; - image->size = image->dcc_offset + image->surface.dcc_size + 8; + image->dcc_pred_offset = image->clear_value_offset + 8; + image->size = image->dcc_offset + image->surface.dcc_size + 16; image->alignment = MAX2(image->alignment, image->surface.dcc_alignment); } diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index bf583452453..353e8382ca1 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -856,6 +856,83 @@ fail: return res; } +static void vi_get_fast_clear_parameters(VkFormat format, + const VkClearColorValue *clear_value, + uint32_t* reset_value, + bool *can_avoid_fast_clear_elim) +{ + bool values[4] = {}; + int extra_channel; + bool main_value = false; + bool extra_value = false; + int i; + *can_avoid_fast_clear_elim = false; + + *reset_value = 0x20202020U; + + const struct vk_format_description *desc = vk_format_description(format); + if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || + format == VK_FORMAT_R5G6B5_UNORM_PACK16 || + format == VK_FORMAT_B5G6R5_UNORM_PACK16) + extra_channel = -1; + else if (desc->layout == VK_FORMAT_LAYOUT_PLAIN) { + if (radv_translate_colorswap(format, false) <= 1) + extra_channel = desc->nr_channels - 1; + else + extra_channel = 0; + } else + return; + + for (i = 0; i < 4; i++) { + int index = desc->swizzle[i] - VK_SWIZZLE_X; + if (desc->swizzle[i] < VK_SWIZZLE_X || + desc->swizzle[i] > VK_SWIZZLE_W) + continue; + + if (desc->channel[i].pure_integer && + desc->channel[i].type == VK_FORMAT_TYPE_SIGNED) { + /* Use the maximum value for clamping the clear color. */ + int max = u_bit_consecutive(0, desc->channel[i].size - 1); + + values[i] = clear_value->int32[i] != 0; + if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max) + return; + } else if (desc->channel[i].pure_integer && + desc->channel[i].type == VK_FORMAT_TYPE_UNSIGNED) { + /* Use the maximum value for clamping the clear color. */ + unsigned max = u_bit_consecutive(0, desc->channel[i].size); + + values[i] = clear_value->uint32[i] != 0U; + if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max) + return; + } else { + values[i] = clear_value->float32[i] != 0.0F; + if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F) + return; + } + + if (index == extra_channel) + extra_value = values[i]; + else + main_value = values[i]; + } + + for (int i = 0; i < 4; ++i) + if (values[i] != main_value && + desc->swizzle[i] - VK_SWIZZLE_X != extra_channel && + desc->swizzle[i] >= VK_SWIZZLE_X && + desc->swizzle[i] <= VK_SWIZZLE_W) + return; + + *can_avoid_fast_clear_elim = true; + if (main_value) + *reset_value |= 0x80808080U; + + if (extra_value) + *reset_value |= 0x40404040U; + return; +} + static bool emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, @@ -930,9 +1007,17 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer, RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; /* clear cmask buffer */ if (iview->image->surface.dcc_size) { + uint32_t reset_value; + bool can_avoid_fast_clear_elim; + vi_get_fast_clear_parameters(iview->image->vk_format, + &clear_value, &reset_value, + &can_avoid_fast_clear_elim); + radv_fill_buffer(cmd_buffer, iview->image->bo, iview->image->offset + iview->image->dcc_offset, - iview->image->surface.dcc_size, 0x20202020); + iview->image->surface.dcc_size, reset_value); + radv_set_dcc_need_cmask_elim_pred(cmd_buffer, iview->image, + !can_avoid_fast_clear_elim); } else { if (iview->image->surface.bpe > 8) { diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index 94610c46987..27f8c160c06 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -334,6 +334,20 @@ emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer, RADV_CMD_FLAG_FLUSH_AND_INV_CB_META); } +static void +radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, bool value) +{ + uint64_t va = 0; + + if (value) { + va = cmd_buffer->device->ws->buffer_get_va(image->bo) + image->offset; + va += image->dcc_pred_offset; + } + + si_emit_set_predication_state(cmd_buffer, va); +} + /** */ void @@ -351,6 +365,10 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, radv_meta_save_pass(&saved_pass_state, cmd_buffer); radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer); + if (image->surface.dcc_size) { + radv_emit_set_predication_state_from_image(cmd_buffer, image, true); + cmd_buffer->state.predicating = true; + } for (uint32_t layer = 0; layer < layer_count; ++layer) { struct radv_image_view iview; @@ -413,6 +431,10 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, &cmd_buffer->pool->alloc); } + if (image->surface.dcc_size) { + cmd_buffer->state.predicating = false; + radv_emit_set_predication_state_from_image(cmd_buffer, image, false); + } radv_meta_restore(&saved_state, cmd_buffer); radv_meta_restore_pass(&saved_pass_state, cmd_buffer); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index e1fb5565494..891b34ef138 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -871,7 +871,7 @@ void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, bool is_mec, enum radv_cmd_flush_bits flush_bits); void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer); -void si_emit_set_pred(struct radv_cmd_buffer *cmd_buffer, uint64_t va); +void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va); void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size); @@ -914,6 +914,9 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, int idx, uint32_t color_values[2]); +void radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + bool value); void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t value); @@ -1219,6 +1222,7 @@ struct radv_image { struct radv_fmask_info fmask; struct radv_cmask_info cmask; uint32_t clear_value_offset; + uint32_t dcc_pred_offset; }; /* Whether the image has a htile that is known consistent with the contents of diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index ace9e665354..88616edfa27 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -1129,8 +1129,9 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) cmd_buffer->state.flush_bits = 0; } +/* sets the CP predication state using a boolean stored at va */ void -si_emit_set_pred(struct radv_cmd_buffer *cmd_buffer, uint64_t va) +si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va) { uint32_t val = 0;