From 8cd53d95fe3c36e659b0bef6ddd12be6fd3c32ed Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Wed, 5 Jun 2024 14:36:10 -0400 Subject: [PATCH] radesonsi/vcn: update vcn4 tile processing logic Vcn4 tile number calculation doesn't consider some input case, which could result in output bitstream corruption, this fixed the issue. Not using the number_of_tiles directly but from calculation. Also change to re-use some macros from local vcn_5_0.c to ac_vcn_enc.h header file. Updated vcn4 spec_misc_av1 ip package. Reviewed-by: Boyuan Zhang Signed-off-by: Ruijing Dong Part-of: --- src/amd/common/ac_vcn_enc.h | 2 + src/gallium/drivers/radeonsi/radeon_vcn_enc.c | 2 - .../drivers/radeonsi/radeon_vcn_enc_4_0.c | 57 ++++++++++++------- .../drivers/radeonsi/radeon_vcn_enc_5_0.c | 2 - src/gallium/frontends/va/picture_av1_enc.c | 1 - src/gallium/include/pipe/p_video_state.h | 1 - 6 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/amd/common/ac_vcn_enc.h b/src/amd/common/ac_vcn_enc.h index 7f6d2d3ec53..3708d19828c 100644 --- a/src/amd/common/ac_vcn_enc.h +++ b/src/amd/common/ac_vcn_enc.h @@ -136,6 +136,8 @@ #define RENCODE_AV1_MAX_TILE_COLS 64 #define RENCODE_AV1_MAX_TILE_ROWS 64 #define RENCODE_AV1_MAX_TILE_AREA (4096 * 2304) +#define RENCODE_AV1_MAX_TILE_WIDTH 4096 +#define RENCODE_AV1_MAX_TILE_HEIGHT 4096 #define RENCODE_INVALID_COLOC_OFFSET 0XFFFFFFFF #define RENCODE_PICTURE_TYPE_B 0 diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c index c59e6a6d84d..9b2c95ea564 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c @@ -687,8 +687,6 @@ static void radeon_vcn_enc_av1_get_spec_misc_param(struct radeon_encoder *enc, enc->enc_pic.av1_spec_misc.cdef_mode = pic->seq.seq_bits.enable_cdef; enc->enc_pic.av1_spec_misc.disable_cdf_update = pic->disable_cdf_update; enc->enc_pic.av1_spec_misc.disable_frame_end_update_cdf = pic->disable_frame_end_update_cdf; - /* tile has moved to tile config from vcn5 */ - enc->enc_pic.av1_spec_misc.num_tiles_per_picture = pic->num_tiles_in_pic; enc->enc_pic.av1_spec_misc.palette_mode_enable = pic->palette_mode_enable; enc->enc_pic.av1_spec_misc.cdef_bits = pic->cdef.cdef_bits; enc->enc_pic.av1_spec_misc.cdef_damping_minus3 = pic->cdef.cdef_damping_minus_3; diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c index 582584319de..ec339179836 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_4_0.c @@ -464,45 +464,62 @@ static void radeon_enc_av1_dpb_management(struct radeon_encoder *enc) static void radeon_enc_spec_misc_av1(struct radeon_encoder *enc) { - uint32_t num_of_tiles = enc->enc_pic.av1_spec_misc.num_tiles_per_picture; - uint32_t threshold_low, threshold_high; - uint32_t num_rows; - uint32_t num_columns; + rvcn_enc_av1_tile_config_t *p_config = &enc->enc_pic.av1_tile_config; + struct tile_1d_layout tile_layout; + uint32_t num_of_tiles; + uint32_t frame_width_in_sb; + uint32_t frame_height_in_sb; + uint32_t num_tiles_cols; + uint32_t num_tiles_rows; + uint32_t max_tile_area_sb = RENCODE_AV1_MAX_TILE_AREA >> (2 * 6); + uint32_t max_tile_width_in_sb = RENCODE_AV1_MAX_TILE_WIDTH >> 6; + uint32_t max_tile_ares_in_sb = 0; + uint32_t max_tile_height_in_sb = 0; + uint32_t min_log2_tiles_width_in_sb; + uint32_t min_log2_tiles; - num_rows = PIPE_ALIGN_IN_BLOCK_SIZE(enc->enc_pic.session_init.aligned_picture_height, + frame_width_in_sb = PIPE_ALIGN_IN_BLOCK_SIZE(enc->enc_pic.session_init.aligned_picture_width, PIPE_AV1_ENC_SB_SIZE); - num_columns = PIPE_ALIGN_IN_BLOCK_SIZE(enc->enc_pic.session_init.aligned_picture_width, + frame_height_in_sb = PIPE_ALIGN_IN_BLOCK_SIZE(enc->enc_pic.session_init.aligned_picture_height, PIPE_AV1_ENC_SB_SIZE); + num_tiles_cols = (frame_width_in_sb > max_tile_width_in_sb) ? 2 : 1; + num_tiles_rows = CLAMP(p_config->num_tile_rows, + 1, RENCODE_AV1_TILE_CONFIG_MAX_NUM_ROWS); + min_log2_tiles_width_in_sb = radeon_enc_av1_tile_log2(max_tile_width_in_sb, frame_width_in_sb); + min_log2_tiles = MAX2(min_log2_tiles_width_in_sb, radeon_enc_av1_tile_log2(max_tile_area_sb, + frame_width_in_sb * frame_height_in_sb)); - if (num_rows > 64) { - /* max tile size 4096 x 2304 */ - threshold_low = ((num_rows + 63) / 64) * ((num_columns + 35) / 36); - num_of_tiles = (num_of_tiles & 1) ? num_of_tiles - 1 : num_of_tiles; - } else - threshold_low = 1; + max_tile_width_in_sb = (num_tiles_cols == 1) ? frame_width_in_sb : max_tile_width_in_sb; - threshold_high = num_rows > 16 ? 16 : num_rows; - threshold_high = num_columns > 64 ? threshold_high * 2 : threshold_high; + if (min_log2_tiles) + max_tile_ares_in_sb = (frame_width_in_sb * frame_height_in_sb) + >> (min_log2_tiles + 1); + else + max_tile_ares_in_sb = frame_width_in_sb * frame_height_in_sb; - num_of_tiles = CLAMP(num_of_tiles, threshold_low, threshold_high); + max_tile_height_in_sb = DIV_ROUND_UP(max_tile_ares_in_sb, max_tile_width_in_sb); + num_tiles_rows = MAX2(num_tiles_rows, + DIV_ROUND_UP(frame_height_in_sb, max_tile_height_in_sb)); + radeon_enc_av1_tile_layout(frame_height_in_sb, num_tiles_rows, 1, &tile_layout); + num_tiles_rows = tile_layout.nb_main_tile + tile_layout.nb_border_tile; + + num_of_tiles = num_tiles_cols * num_tiles_rows; /* in case of multiple tiles, it should be an obu frame */ if (num_of_tiles > 1) enc->enc_pic.stream_obu_frame = 1; else enc->enc_pic.stream_obu_frame = enc->enc_pic.is_obu_frame; - enc->enc_pic.av1_spec_misc.num_tiles_per_picture = num_of_tiles; - RADEON_ENC_BEGIN(enc->cmd.spec_misc_av1); RADEON_ENC_CS(enc->enc_pic.av1_spec_misc.palette_mode_enable); RADEON_ENC_CS(enc->enc_pic.av1_spec_misc.mv_precision); RADEON_ENC_CS(enc->enc_pic.av1_spec_misc.cdef_mode); RADEON_ENC_CS(enc->enc_pic.av1_spec_misc.disable_cdf_update); RADEON_ENC_CS(enc->enc_pic.av1_spec_misc.disable_frame_end_update_cdf); - RADEON_ENC_CS(enc->enc_pic.av1_spec_misc.num_tiles_per_picture); - RADEON_ENC_CS(0); - RADEON_ENC_CS(0); + RADEON_ENC_CS(num_of_tiles); + RADEON_ENC_CS(0xFFFFFFFF); + RADEON_ENC_CS(0xFFFFFFFF); RADEON_ENC_END(); } diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc_5_0.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc_5_0.c index 0b5d64ef110..5c68ff14d44 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_5_0.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_5_0.c @@ -40,8 +40,6 @@ #define RENCODE_IB_PARAM_AV1_ENCODE_PARAMS 0x00300004 #define RENCODE_AV1_MIN_TILE_WIDTH 256 -#define RENCODE_AV1_MAX_TILE_WIDTH 4096 -#define RENCODE_AV1_MAX_TILE_HEIGHT 4096 static void radeon_enc_cdf_default_table(struct radeon_encoder *enc) { diff --git a/src/gallium/frontends/va/picture_av1_enc.c b/src/gallium/frontends/va/picture_av1_enc.c index ae504eee2c5..8ba4d1a783b 100644 --- a/src/gallium/frontends/va/picture_av1_enc.c +++ b/src/gallium/frontends/va/picture_av1_enc.c @@ -144,7 +144,6 @@ VAStatus vlVaHandleVAEncPictureParameterBufferTypeAV1(vlVaDriver *drv, vlVaConte context->desc.av1enc.allow_high_precision_mv = av1->picture_flags.bits.allow_high_precision_mv; context->desc.av1enc.palette_mode_enable = av1->picture_flags.bits.palette_mode_enable; context->desc.av1enc.long_term_reference = av1->picture_flags.bits.long_term_reference; - context->desc.av1enc.num_tiles_in_pic = av1->tile_cols * av1->tile_rows; context->desc.av1enc.tile_rows = av1->tile_rows; context->desc.av1enc.tile_cols = av1->tile_cols; context->desc.av1enc.context_update_tile_id = av1->context_update_tile_id; diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h index cb4381f2f5c..44988072614 100644 --- a/src/gallium/include/pipe/p_video_state.h +++ b/src/gallium/include/pipe/p_video_state.h @@ -1008,7 +1008,6 @@ struct pipe_av1_enc_picture_desc struct pipe_enc_quality_modes quality_modes; struct pipe_enc_intra_refresh intra_refresh; struct pipe_enc_roi roi; - uint32_t num_tiles_in_pic; /* [1, 32], */ uint32_t tile_rows; uint32_t tile_cols; unsigned num_tile_groups;