From 3f7a36a9fb877e2723921ae2defa523f2ab8c5e2 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 23 May 2024 08:43:11 +0200 Subject: [PATCH] amd/common: add a function to initialize cb surface Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_descriptors.c | 210 ++++++++++++++++++++++++++++++++ src/amd/common/ac_descriptors.h | 31 +++++ src/amd/common/ac_surface.c | 11 -- src/amd/common/ac_surface.h | 1 - 4 files changed, 241 insertions(+), 12 deletions(-) diff --git a/src/amd/common/ac_descriptors.c b/src/amd/common/ac_descriptors.c index 6a3952d8ef9..19f2b94876e 100644 --- a/src/amd/common/ac_descriptors.c +++ b/src/amd/common/ac_descriptors.c @@ -900,3 +900,213 @@ ac_set_mutable_ds_surface_fields(const struct radeon_info *info, const struct ac ds->db_z_info |= S_028040_ZRANGE_PRECISION(state->zrange_precision); } } + +static uint32_t +ac_get_dcc_min_compressed_block_size(const struct radeon_info *info) +{ + /* This should typically match the request size of the memory type. DIMMs have 64B minimum + * request size, which means compressing 64B to 32B has no benefit, while GDDR and HBM have + * 32B minimum request size. Sometimes a different size is used depending on the data fabric, + * etc. + */ + return info->has_dedicated_vram || info->family == CHIP_GFX1151 ? + V_028C78_MIN_BLOCK_SIZE_32B : V_028C78_MIN_BLOCK_SIZE_64B; +} + +static void +ac_init_gfx6_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state, + uint32_t cb_format, bool force_dst_alpha_1, struct ac_cb_surface *cb) +{ + const struct radeon_surf *surf = state->surf; + const uint32_t endian = ac_colorformat_endian_swap(cb_format); + + cb->cb_color_info |= S_028C70_ENDIAN(endian) | + S_028C70_FORMAT_GFX6(cb_format) | + S_028C70_COMPRESSION(!!surf->fmask_offset); + cb->cb_color_view = S_028C6C_SLICE_START(state->first_layer) | + S_028C6C_SLICE_MAX_GFX6(state->last_layer); + cb->cb_color_attrib = S_028C74_NUM_SAMPLES(util_logbase2(state->num_samples)) | + S_028C74_NUM_FRAGMENTS_GFX6(util_logbase2(state->num_storage_samples)) | + S_028C74_FORCE_DST_ALPHA_1_GFX6(force_dst_alpha_1); + cb->cb_color_attrib2 = 0; + cb->cb_dcc_control = 0; + + if (info->gfx_level == GFX9) { + cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(state->base_level); + cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(state->num_layers) | + S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type); + cb->cb_color_attrib2 |= S_028C68_MIP0_WIDTH(state->width - 1) | + S_028C68_MIP0_HEIGHT(state->height - 1) | + S_028C68_MAX_MIP(state->num_levels - 1); + } + + if (info->gfx_level >= GFX8) { + uint32_t max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; + + if (state->num_storage_samples > 1) { + if (surf->bpe == 1) + max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; + else if (surf->bpe == 2) + max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; + } + + cb->cb_dcc_control |= S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | + S_028C78_MIN_COMPRESSED_BLOCK_SIZE(ac_get_dcc_min_compressed_block_size(info)) | + S_028C78_INDEPENDENT_64B_BLOCKS(1); + } + + if (info->gfx_level == GFX6) { + /* Due to a hw bug, FMASK_BANK_HEIGHT must still be set on GFX6. (inherited from GFX5) */ + /* This must also be set for fast clear to work without FMASK. */ + const uint32_t fmask_bankh = surf->fmask_offset ? surf->u.legacy.color.fmask.bankh + : surf->u.legacy.bankh; + cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(util_logbase2(fmask_bankh)); + } +} + +static void +ac_init_gfx10_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state, + uint32_t cb_format, bool force_dst_alpha_1, uint32_t width, + struct ac_cb_surface *cb) +{ + const struct radeon_surf *surf = state->surf; + uint32_t first_layer = state->first_layer; + uint32_t base_level = state->base_level; + uint32_t num_levels = state->num_levels; + + if (state->gfx10.nbc_view) { + assert(state->gfx10.nbc_view->valid); + first_layer = 0; + base_level = state->gfx10.nbc_view->level; + num_levels = state->gfx10.nbc_view->num_levels; + } + + cb->cb_color_view = S_028C6C_SLICE_START(first_layer) | + S_028C6C_SLICE_MAX_GFX10(state->last_layer) | + S_028C6C_MIP_LEVEL_GFX10(base_level); + cb->cb_color_attrib = 0; + cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | + S_028C68_MIP0_HEIGHT(state->height - 1) | + S_028C68_MAX_MIP(num_levels - 1); + cb->cb_color_attrib3 = S_028EE0_MIP0_DEPTH(state->num_layers) | + S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) | + S_028EE0_RESOURCE_LEVEL(info->gfx_level >= GFX11 ? 0 : 1); + cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | + S_028C78_MAX_COMPRESSED_BLOCK_SIZE(surf->u.gfx9.color.dcc.max_compressed_block_size) | + S_028C78_MIN_COMPRESSED_BLOCK_SIZE(ac_get_dcc_min_compressed_block_size(info)) | + S_028C78_INDEPENDENT_64B_BLOCKS(surf->u.gfx9.color.dcc.independent_64B_blocks); + + if (info->gfx_level >= GFX11) { + assert(!UTIL_ARCH_BIG_ENDIAN); + cb->cb_color_info |= S_028C70_FORMAT_GFX11(cb_format); + cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(util_logbase2(state->num_storage_samples)) | + S_028C74_FORCE_DST_ALPHA_1_GFX11(force_dst_alpha_1); + cb->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(surf->u.gfx9.color.dcc.independent_128B_blocks); + } else { + const uint32_t endian = ac_colorformat_endian_swap(cb_format); + + cb->cb_color_info |= S_028C70_ENDIAN(endian) | + S_028C70_FORMAT_GFX6(cb_format) | + S_028C70_COMPRESSION(!!surf->fmask_offset); + cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(util_logbase2(state->num_samples)) | + S_028C74_NUM_FRAGMENTS_GFX6(util_logbase2(state->num_storage_samples)) | + S_028C74_FORCE_DST_ALPHA_1_GFX6(force_dst_alpha_1); + cb->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(surf->u.gfx9.color.dcc.independent_128B_blocks); + } +} + +static void +ac_init_gfx12_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state, + uint32_t cb_format, bool force_dst_alpha_1, uint32_t width, + struct ac_cb_surface *cb) +{ + const struct radeon_surf *surf = state->surf; + + assert(!UTIL_ARCH_BIG_ENDIAN); + cb->cb_color_info |= S_028EC0_FORMAT(cb_format); + cb->cb_color_view = S_028C64_SLICE_START(state->first_layer) | + S_028C64_SLICE_MAX(state->last_layer); + cb->cb_color_view2 = S_028C68_MIP_LEVEL(state->base_level); + cb->cb_color_attrib = S_028C6C_NUM_FRAGMENTS(util_logbase2(state->num_storage_samples)) | + S_028C6C_FORCE_DST_ALPHA_1(force_dst_alpha_1); + cb->cb_color_attrib2 = S_028C78_MIP0_HEIGHT(state->height - 1) | + S_028C78_MIP0_WIDTH(width - 1); + cb->cb_color_attrib3 = S_028C7C_MIP0_DEPTH(state->num_layers) | + S_028C7C_MAX_MIP(state->num_levels - 1) | + S_028C7C_RESOURCE_TYPE(surf->u.gfx9.resource_type); + cb->cb_dcc_control = S_028C70_MAX_UNCOMPRESSED_BLOCK_SIZE(1) | /* 256B */ + S_028C70_MAX_COMPRESSED_BLOCK_SIZE(surf->u.gfx9.color.dcc.max_compressed_block_size) | + S_028C70_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) | + S_028C70_MAX_COMP_FRAGS(state->num_samples >= 8 ? 3 : + state->num_samples >= 4 ? 2 : 0); +} + +void +ac_init_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state, struct ac_cb_surface *cb) +{ + const struct util_format_description *desc = util_format_description(state->format); + const uint32_t cb_format = ac_get_cb_format(info->gfx_level, state->format); + const struct radeon_surf *surf = state->surf; + uint32_t width = state->width; + + assert(cb_format != V_028C70_COLOR_INVALID); + + /* Intensity is implemented as Red, so treat it that way. */ + const bool force_dst_alpha_1 = + desc->swizzle[3] == PIPE_SWIZZLE_1 || util_format_is_intensity(state->format); + + /* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple of + * 256B for GFX10.3-11 and 128B for GFX12. + * + * We set the pitch in MIP0_WIDTH. + */ + if (info->gfx_level >= GFX10_3 && surf->u.gfx9.uses_custom_pitch) { + ASSERTED unsigned min_alignment = info->gfx_level >= GFX12 ? 128 : 256; + assert((surf->u.gfx9.surf_pitch * surf->bpe) % min_alignment == 0); + assert(surf->is_linear); + + width = surf->u.gfx9.surf_pitch; + + /* Subsampled images have the pitch in the units of blocks. */ + if (surf->blk_w == 2) + width *= 2; + } + + const uint32_t swap = ac_translate_colorswap(info->gfx_level, state->format, false); + const uint32_t ntype = ac_get_cb_number_type(state->format); + uint32_t blend_clamp = 0, blend_bypass = 0; + + /* blend clamp should be set for all NORM/SRGB types */ + if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || + ntype == V_028C70_NUMBER_SRGB) + blend_clamp = 1; + + /* set blend bypass according to docs if SINT/UINT or 8/24 COLOR variants */ + if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || + cb_format == V_028C70_COLOR_8_24 || cb_format == V_028C70_COLOR_24_8 || + cb_format == V_028C70_COLOR_X24_8_32_FLOAT) { + blend_clamp = 0; + blend_bypass = 1; + } + + const bool round_mode = ntype != V_028C70_NUMBER_UNORM && + ntype != V_028C70_NUMBER_SNORM && + ntype != V_028C70_NUMBER_SRGB && + cb_format != V_028C70_COLOR_8_24 && + cb_format != V_028C70_COLOR_24_8; + + cb->cb_color_info = S_028C70_COMP_SWAP(swap) | + S_028C70_BLEND_CLAMP(blend_clamp) | + S_028C70_BLEND_BYPASS(blend_bypass) | + S_028C70_SIMPLE_FLOAT(1) | + S_028C70_ROUND_MODE(round_mode) | + S_028C70_NUMBER_TYPE(ntype); + + if (info->gfx_level >= GFX12) { + ac_init_gfx12_cb_surface(info, state, cb_format, force_dst_alpha_1, width, cb); + } else if (info->gfx_level >= GFX10) { + ac_init_gfx10_cb_surface(info, state, cb_format, force_dst_alpha_1, width, cb); + } else { + ac_init_gfx6_cb_surface(info, state, cb_format, force_dst_alpha_1, cb); + } +} diff --git a/src/amd/common/ac_descriptors.h b/src/amd/common/ac_descriptors.h index 6dbcde97f19..1c5c0d26b77 100644 --- a/src/amd/common/ac_descriptors.h +++ b/src/amd/common/ac_descriptors.h @@ -203,6 +203,37 @@ void ac_set_mutable_ds_surface_fields(const struct radeon_info *info, const struct ac_mutable_ds_state *state, struct ac_ds_surface *ds); +struct ac_cb_state { + const struct radeon_surf *surf; + enum pipe_format format; + uint32_t width : 17; + uint32_t height : 17; + uint32_t first_layer : 14; + uint32_t last_layer : 14; + uint32_t num_layers : 14; + uint32_t num_samples : 5; + uint32_t num_storage_samples : 5; + uint32_t base_level : 5; + uint32_t num_levels : 6; + + struct { + struct ac_surf_nbc_view *nbc_view; + } gfx10; +}; + +struct ac_cb_surface { + uint32_t cb_color_info; + uint32_t cb_color_view; + uint32_t cb_color_view2; + uint32_t cb_color_attrib; + uint32_t cb_color_attrib2; /* GFX9+ */ + uint32_t cb_color_attrib3; /* GFX10+ */ + uint32_t cb_dcc_control; +}; + +void +ac_init_cb_surface(const struct radeon_info *info, const struct ac_cb_state *state, struct ac_cb_surface *cb); + #ifdef __cplusplus } #endif diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index c53591740ad..bbb72bc6043 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -106,17 +106,6 @@ unsigned ac_pipe_config_to_num_pipes(unsigned pipe_config) } } -unsigned ac_get_dcc_min_compressed_block_size(const struct radeon_info *info) -{ - /* This should typically match the request size of the memory type. DIMMs have 64B minimum - * request size, which means compressing 64B to 32B has no benefit, while GDDR and HBM have - * 32B minimum request size. Sometimes a different size is used depending on the data fabric, - * etc. - */ - return info->has_dedicated_vram || info->family == CHIP_GFX1151 ? - V_028C78_MIN_BLOCK_SIZE_32B : V_028C78_MIN_BLOCK_SIZE_64B; -} - bool ac_modifier_has_dcc(uint64_t modifier) { return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index 032449b95a5..5f812d47b52 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -445,7 +445,6 @@ int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *inf const struct ac_surf_config *config, enum radeon_surf_mode mode, struct radeon_surf *surf); void ac_surface_zero_dcc_fields(struct radeon_surf *surf); -unsigned ac_get_dcc_min_compressed_block_size(const struct radeon_info *info); unsigned ac_pipe_config_to_num_pipes(unsigned pipe_config); void ac_surface_apply_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,