radv: Re-enable retiling.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10037>
This commit is contained in:
Bas Nieuwenhuizen
2021-04-13 11:21:36 +02:00
committed by Marge Bot
parent 515ffe4af4
commit 61a1a385d3
4 changed files with 100 additions and 69 deletions

View File

@@ -5983,6 +5983,14 @@ radv_retile_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *im
(dst_queue_mask & (1u << RADV_QUEUE_FOREIGN))))
radv_retile_dcc(cmd_buffer, image);
}
static bool
radv_image_need_retile(const struct radv_image *image)
{
return image->planes[0].surface.display_dcc_offset &&
image->planes[0].surface.display_dcc_offset != image->planes[0].surface.meta_offset;
}
/**
* Handle color image transitions for DCC/FMASK/CMASK.
*/
@@ -6003,7 +6011,7 @@ radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct ra
radv_init_color_image_metadata(cmd_buffer, image, src_layout, src_render_loop, dst_layout,
dst_render_loop, src_queue_mask, dst_queue_mask, range);
if (0)
if (radv_image_need_retile(image))
radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
return;
}
@@ -6025,8 +6033,8 @@ radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct ra
fast_clear_flushed = true;
}
/*if (image->retile_map)
radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);*/
if (radv_image_need_retile(image))
radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
} else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout, src_render_loop,
src_queue_mask) &&

View File

@@ -460,16 +460,8 @@ radv_device_init_meta(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail_fmask_expand;
if (!on_demand) {
result = radv_device_init_meta_dcc_retile_state(device);
if (result != VK_SUCCESS)
goto fail_dcc_retile;
}
return VK_SUCCESS;
fail_dcc_retile:
radv_device_finish_meta_fmask_expand_state(device);
fail_fmask_expand:
radv_device_finish_meta_resolve_fragment_state(device);
fail_resolve_fragment:

View File

@@ -125,7 +125,6 @@ void radv_device_finish_meta_resolve_fragment_state(struct radv_device *device);
VkResult radv_device_init_meta_fmask_expand_state(struct radv_device *device);
void radv_device_finish_meta_fmask_expand_state(struct radv_device *device);
VkResult radv_device_init_meta_dcc_retile_state(struct radv_device *device);
void radv_device_finish_meta_dcc_retile_state(struct radv_device *device);
void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer,

View File

@@ -21,51 +21,69 @@
* IN THE SOFTWARE.
*/
#define AC_SURFACE_INCLUDE_NIR
#include "ac_surface.h"
#include "radv_meta.h"
#include "radv_private.h"
static nir_ssa_def *
get_global_ids(nir_builder *b, unsigned num_components)
{
unsigned mask = BITFIELD_MASK(num_components);
nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
nir_ssa_def *block_ids = nir_channels(b, nir_load_work_group_id(b, 32), mask);
nir_ssa_def *block_size = nir_channels(
b,
nir_imm_ivec4(b, b->shader->info.cs.local_size[0], b->shader->info.cs.local_size[1],
b->shader->info.cs.local_size[2], 0),
mask);
return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);
}
static nir_shader *
build_dcc_retile_compute_shader(struct radv_device *dev)
build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf)
{
const struct glsl_type *buf_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_UINT);
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_retile_compute");
b.shader->info.cs.local_size[0] = 256;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[0] = 8;
b.shader->info.cs.local_size[1] = 8;
b.shader->info.cs.local_size[2] = 1;
nir_variable *indices = nir_variable_create(b.shader, nir_var_uniform, buf_type, "indices_in");
indices->data.descriptor_set = 0;
indices->data.binding = 0;
nir_ssa_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_ssa_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1);
nir_ssa_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2);
nir_ssa_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
nir_ssa_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1);
nir_ssa_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2);
nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in");
input_dcc->data.descriptor_set = 0;
input_dcc->data.binding = 1;
input_dcc->data.binding = 0;
nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out");
output_dcc->data.descriptor_set = 0;
output_dcc->data.binding = 2;
output_dcc->data.binding = 1;
nir_ssa_def *indices_ref = &nir_build_deref_var(&b, indices)->dest.ssa;
nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], 0, 0, 0);
nir_ssa_def *coord = get_global_ids(&b, 2);
nir_ssa_def *zero = nir_imm_int(&b, 0);
coord = nir_imul(
&b, coord,
nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_intrinsic_instr *index_vals =
nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
index_vals->num_components = 2;
index_vals->src[0] = nir_src_for_ssa(indices_ref);
index_vals->src[1] = nir_src_for_ssa(global_id);
index_vals->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
index_vals->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_ssa_dest_init(&index_vals->instr, &index_vals->dest, 2, 32, "indices");
nir_builder_instr_insert(&b, &index_vals->instr);
nir_ssa_def *src = nir_channels(&b, &index_vals->dest.ssa, 1);
nir_ssa_def *dst = nir_channels(&b, &index_vals->dest.ssa, 2);
nir_ssa_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe,
&surf->u.gfx9.color.dcc_equation, src_dcc_pitch,
src_dcc_height, zero, nir_channel(&b, coord, 0),
nir_channel(&b, coord, 1), zero, zero, zero);
nir_ssa_def *dst = ac_nir_dcc_addr_from_coord(
&b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation,
dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
zero, zero, zero);
nir_intrinsic_instr *dcc_val =
nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
@@ -105,16 +123,26 @@ radv_device_finish_meta_dcc_retile_state(struct radv_device *device)
memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
}
VkResult
radv_device_init_meta_dcc_retile_state(struct radv_device *device)
/*
* This take a surface, but the only things used are:
* - BPE
* - DCC equations
* - DCC block size
*
* BPE is always 4 at the moment and the rest is derived from the tilemode,
* and ac_surface limits displayable DCC to at most 1 tiling mode. So in effect
* this shader is indepedent of the surface.
*/
static VkResult
radv_device_init_meta_dcc_retile_state(struct radv_device *device, struct radeon_surf *surf)
{
VkResult result = VK_SUCCESS;
nir_shader *cs = build_dcc_retile_compute_shader(device);
nir_shader *cs = build_dcc_retile_compute_shader(device, surf);
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 3,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]){
{.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
@@ -126,11 +154,6 @@ radv_device_init_meta_dcc_retile_state(struct radv_device *device)
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL},
{.binding = 2,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL},
}};
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
@@ -143,7 +166,8 @@ radv_device_init_meta_dcc_retile_state(struct radv_device *device)
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.dcc_retile.ds_layout,
.pushConstantRangeCount = 0,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
result =
@@ -198,24 +222,26 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
/* Compile pipelines if not already done so. */
if (!cmd_buffer->device->meta_state.dcc_retile.pipeline) {
VkResult ret = radv_device_init_meta_dcc_retile_state(cmd_buffer->device);
VkResult ret =
radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface);
if (ret != VK_SUCCESS) {
cmd_buffer->record_result = ret;
return;
}
}
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
radv_meta_save(
&saved_state, cmd_buffer,
RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.dcc_retile.pipeline);
struct radv_buffer buffer = {.size = image->size, .bo = image->bo, .offset = image->offset};
struct radv_buffer_view views[3];
VkBufferView view_handles[3];
radv_buffer_view_init(views + 1, cmd_buffer->device,
struct radv_buffer_view views[2];
VkBufferView view_handles[2];
radv_buffer_view_init(views, cmd_buffer->device,
&(VkBufferViewCreateInfo){
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.buffer = radv_buffer_to_handle(&buffer),
@@ -223,7 +249,7 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
.range = image->planes[0].surface.meta_size,
.format = VK_FORMAT_R8_UINT,
});
radv_buffer_view_init(views + 2, cmd_buffer->device,
radv_buffer_view_init(views + 1, cmd_buffer->device,
&(VkBufferViewCreateInfo){
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.buffer = radv_buffer_to_handle(&buffer),
@@ -231,12 +257,12 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
.range = image->planes[0].surface.u.gfx9.color.display_dcc_size,
.format = VK_FORMAT_R8_UINT,
});
for (unsigned i = 0; i < 3; ++i)
for (unsigned i = 0; i < 2; ++i)
view_handles[i] = radv_buffer_view_to_handle(&views[i]);
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.dcc_retile.p_layout, 0, /* set */
3, /* descriptorWriteCount */
2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]){
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
@@ -254,20 +280,26 @@ radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.pTexelBufferView = &view_handles[1],
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 2,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.pTexelBufferView = &view_handles[2],
},
});
/* src+dst pairs count double, so the number of DCC bytes we move is
* actually half of dcc_retile_num_elements. */
/*radv_unaligned_dispatch(cmd_buffer, image->planes[0].surface.u.gfx9.color.dcc_retile_num_elements / 2,
1, 1);*/
unsigned width = DIV_ROUND_UP(image->info.width, vk_format_get_blockwidth(image->vk_format));
unsigned height = DIV_ROUND_UP(image->info.height, vk_format_get_blockheight(image->vk_format));
unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
unsigned dcc_height =
DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
uint32_t constants[] = {
image->planes[0].surface.u.gfx9.color.dcc_pitch_max + 1,
image->planes[0].surface.u.gfx9.color.dcc_height,
image->planes[0].surface.u.gfx9.color.display_dcc_pitch_max + 1,
image->planes[0].surface.u.gfx9.color.display_dcc_height,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.dcc_retile.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
constants);
radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, 1);
radv_meta_restore(&saved_state, cmd_buffer);