From cbbed6ca3ec71666796ca7952db9aef0178ae3d4 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Thu, 22 Feb 2024 16:39:13 +0100 Subject: [PATCH] radeonsi/vcn: Implement separate QP for I/P/B frames This also fixes max_au_size as a side effect of using new IB. Reviewed-by: Ruijing Dong Part-of: --- src/amd/common/ac_vcn_enc.h | 17 +++- src/gallium/drivers/radeonsi/radeon_vcn_enc.c | 82 +++++++++++++++---- src/gallium/drivers/radeonsi/radeon_vcn_enc.h | 1 + .../drivers/radeonsi/radeon_vcn_enc_1_2.c | 33 ++++++-- .../drivers/radeonsi/radeon_vcn_enc_2_0.c | 14 ++-- 5 files changed, 114 insertions(+), 33 deletions(-) diff --git a/src/amd/common/ac_vcn_enc.h b/src/amd/common/ac_vcn_enc.h index 589f690f5d7..8615f898cd1 100644 --- a/src/amd/common/ac_vcn_enc.h +++ b/src/amd/common/ac_vcn_enc.h @@ -312,13 +312,22 @@ typedef struct rvcn_enc_rate_ctl_layer_init_s { } rvcn_enc_rate_ctl_layer_init_t; typedef struct rvcn_enc_rate_ctl_per_picture_s { - uint32_t qp; - uint32_t min_qp_app; - uint32_t max_qp_app; - uint32_t max_au_size; + uint32_t qp_i; + uint32_t qp_p; + uint32_t qp_b; + uint32_t min_qp_i; + uint32_t max_qp_i; + uint32_t min_qp_p; + uint32_t max_qp_p; + uint32_t min_qp_b; + uint32_t max_qp_b; + uint32_t max_au_size_i; + uint32_t max_au_size_p; + uint32_t max_au_size_b; uint32_t enabled_filler_data; uint32_t skip_frame_enable; uint32_t enforce_hrd; + uint32_t reserved_0xff; } rvcn_enc_rate_ctl_per_picture_t; typedef struct rvcn_enc_quality_params_s { diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c index 0d81826baf0..d4558f9ba9f 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.c @@ -235,7 +235,7 @@ static void radeon_vcn_enc_h264_get_spec_misc_param(struct radeon_encoder *enc, static void radeon_vcn_enc_h264_get_rc_param(struct radeon_encoder *enc, struct pipe_h264_enc_picture_desc *pic) { - uint32_t frame_rate_den, frame_rate_num; + uint32_t frame_rate_den, frame_rate_num, max_qp; enc->enc_pic.num_temporal_layers = pic->seq.num_temporal_layers ? pic->seq.num_temporal_layers : 1; for (int i = 0; i < enc->enc_pic.num_temporal_layers; i++) { @@ -261,10 +261,16 @@ static void radeon_vcn_enc_h264_get_rc_param(struct radeon_encoder *enc, frame_rate_num); } enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rate_ctrl[0].vbv_buf_lv; - enc->enc_pic.rc_per_pic.qp = pic->quant_i_frames; - enc->enc_pic.rc_per_pic.min_qp_app = pic->rate_ctrl[0].min_qp; - enc->enc_pic.rc_per_pic.max_qp_app = pic->rate_ctrl[0].max_qp ? - pic->rate_ctrl[0].max_qp : 51; + enc->enc_pic.rc_per_pic.qp_i = pic->quant_i_frames; + enc->enc_pic.rc_per_pic.qp_p = pic->quant_p_frames; + enc->enc_pic.rc_per_pic.qp_b = pic->quant_b_frames; + enc->enc_pic.rc_per_pic.min_qp_i = pic->rate_ctrl[0].min_qp; + enc->enc_pic.rc_per_pic.min_qp_p = pic->rate_ctrl[0].min_qp; + enc->enc_pic.rc_per_pic.min_qp_b = pic->rate_ctrl[0].min_qp; + max_qp = pic->rate_ctrl[0].max_qp ? pic->rate_ctrl[0].max_qp : 51; + enc->enc_pic.rc_per_pic.max_qp_i = max_qp; + enc->enc_pic.rc_per_pic.max_qp_p = max_qp; + enc->enc_pic.rc_per_pic.max_qp_b = max_qp; enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rate_ctrl[0].fill_data_enable; enc->enc_pic.rc_per_pic.skip_frame_enable = pic->rate_ctrl[0].skip_frame_enable; enc->enc_pic.rc_per_pic.enforce_hrd = pic->rate_ctrl[0].enforce_hrd; @@ -285,7 +291,9 @@ static void radeon_vcn_enc_h264_get_rc_param(struct radeon_encoder *enc, default: enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; } - enc->enc_pic.rc_per_pic.max_au_size = pic->rate_ctrl[0].max_au_size; + enc->enc_pic.rc_per_pic.max_au_size_i = pic->rate_ctrl[0].max_au_size; + enc->enc_pic.rc_per_pic.max_au_size_p = pic->rate_ctrl[0].max_au_size; + enc->enc_pic.rc_per_pic.max_au_size_b = pic->rate_ctrl[0].max_au_size; } static void radeon_vcn_enc_h264_get_vui_param(struct radeon_encoder *enc, @@ -482,7 +490,7 @@ static void radeon_vcn_enc_hevc_get_spec_misc_param(struct radeon_encoder *enc, static void radeon_vcn_enc_hevc_get_rc_param(struct radeon_encoder *enc, struct pipe_h265_enc_picture_desc *pic) { - uint32_t frame_rate_den, frame_rate_num; + uint32_t frame_rate_den, frame_rate_num, max_qp; enc->enc_pic.rc_layer_init[0].target_bit_rate = pic->rc.target_bitrate; enc->enc_pic.rc_layer_init[0].peak_bit_rate = pic->rc.peak_bitrate; @@ -505,9 +513,13 @@ static void radeon_vcn_enc_hevc_get_rc_param(struct radeon_encoder *enc, frame_rate_den, frame_rate_num); enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rc.vbv_buf_lv; - enc->enc_pic.rc_per_pic.qp = pic->rc.quant_i_frames; - enc->enc_pic.rc_per_pic.min_qp_app = pic->rc.min_qp; - enc->enc_pic.rc_per_pic.max_qp_app = pic->rc.max_qp ? pic->rc.max_qp : 51; + enc->enc_pic.rc_per_pic.qp_i = pic->rc.quant_i_frames; + enc->enc_pic.rc_per_pic.qp_p = pic->rc.quant_p_frames; + enc->enc_pic.rc_per_pic.min_qp_i = pic->rc.min_qp; + enc->enc_pic.rc_per_pic.min_qp_p = pic->rc.min_qp; + max_qp = pic->rc.max_qp ? pic->rc.max_qp : 51; + enc->enc_pic.rc_per_pic.max_qp_i = max_qp; + enc->enc_pic.rc_per_pic.max_qp_p = max_qp; enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rc.fill_data_enable; enc->enc_pic.rc_per_pic.skip_frame_enable = pic->rc.skip_frame_enable; enc->enc_pic.rc_per_pic.enforce_hrd = pic->rc.enforce_hrd; @@ -527,7 +539,8 @@ static void radeon_vcn_enc_hevc_get_rc_param(struct radeon_encoder *enc, default: enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; } - enc->enc_pic.rc_per_pic.max_au_size = pic->rc.max_au_size; + enc->enc_pic.rc_per_pic.max_au_size_i = pic->rc.max_au_size; + enc->enc_pic.rc_per_pic.max_au_size_p = pic->rc.max_au_size; } static void radeon_vcn_enc_hevc_get_vui_param(struct radeon_encoder *enc, @@ -699,7 +712,7 @@ static void radeon_vcn_enc_av1_color_description(struct radeon_encoder *enc, static void radeon_vcn_enc_av1_get_rc_param(struct radeon_encoder *enc, struct pipe_av1_enc_picture_desc *pic) { - uint32_t frame_rate_den, frame_rate_num; + uint32_t frame_rate_den, frame_rate_num, min_qp, max_qp; for (int i = 0; i < ARRAY_SIZE(enc->enc_pic.rc_layer_init); i++) { enc->enc_pic.rc_layer_init[i].target_bit_rate = pic->rc[i].target_bitrate; @@ -724,9 +737,14 @@ static void radeon_vcn_enc_av1_get_rc_param(struct radeon_encoder *enc, frame_rate_num); } enc->enc_pic.rc_session_init.vbv_buffer_level = pic->rc[0].vbv_buf_lv; - enc->enc_pic.rc_per_pic.qp = pic->rc[0].qp; - enc->enc_pic.rc_per_pic.min_qp_app = pic->rc[0].min_qp ? pic->rc[0].min_qp : 1; - enc->enc_pic.rc_per_pic.max_qp_app = pic->rc[0].max_qp ? pic->rc[0].max_qp : 255; + enc->enc_pic.rc_per_pic.qp_i = pic->rc[0].qp; + enc->enc_pic.rc_per_pic.qp_p = pic->rc[0].qp_inter; + min_qp = pic->rc[0].min_qp ? pic->rc[0].min_qp : 1; + enc->enc_pic.rc_per_pic.min_qp_i = min_qp; + enc->enc_pic.rc_per_pic.min_qp_p = min_qp; + max_qp = pic->rc[0].max_qp ? pic->rc[0].max_qp : 255; + enc->enc_pic.rc_per_pic.max_qp_i = max_qp; + enc->enc_pic.rc_per_pic.max_qp_p = max_qp; enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rc[0].fill_data_enable; enc->enc_pic.rc_per_pic.skip_frame_enable = pic->rc[0].skip_frame_enable; enc->enc_pic.rc_per_pic.enforce_hrd = pic->rc[0].enforce_hrd; @@ -746,7 +764,8 @@ static void radeon_vcn_enc_av1_get_rc_param(struct radeon_encoder *enc, default: enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE; } - enc->enc_pic.rc_per_pic.max_au_size = pic->rc[0].max_au_size; + enc->enc_pic.rc_per_pic.max_au_size_i = pic->rc[0].max_au_size; + enc->enc_pic.rc_per_pic.max_au_size_p = pic->rc[0].max_au_size; } static void radeon_vcn_enc_av1_get_param(struct radeon_encoder *enc, @@ -1053,6 +1072,7 @@ static void radeon_enc_begin_frame(struct pipe_video_codec *encoder, struct radeon_encoder *enc = (struct radeon_encoder *)encoder; struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source; enc->need_rate_control = false; + enc->need_rc_per_pic = false; if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC) { struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture; @@ -1060,12 +1080,32 @@ static void radeon_enc_begin_frame(struct pipe_video_codec *encoder, (enc->enc_pic.rc_layer_init[0].target_bit_rate != pic->rate_ctrl[0].target_bitrate) || (enc->enc_pic.rc_layer_init[0].frame_rate_num != pic->rate_ctrl[0].frame_rate_num) || (enc->enc_pic.rc_layer_init[0].frame_rate_den != pic->rate_ctrl[0].frame_rate_den); + + if (enc->need_rate_control) { + enc->enc_pic.rc_per_pic.qp_i = 0; + enc->enc_pic.rc_per_pic.qp_p = 0; + enc->enc_pic.rc_per_pic.qp_b = 0; + } + + enc->need_rc_per_pic = + (!enc->enc_pic.rc_per_pic.qp_i && enc->enc_pic.rc_per_pic.qp_i != pic->quant_i_frames) || + (!enc->enc_pic.rc_per_pic.qp_p && enc->enc_pic.rc_per_pic.qp_p != pic->quant_p_frames) || + (!enc->enc_pic.rc_per_pic.qp_b && enc->enc_pic.rc_per_pic.qp_b != pic->quant_b_frames); } else if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) { struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture; enc->need_rate_control = (enc->enc_pic.rc_layer_init[0].target_bit_rate != pic->rc.target_bitrate) || (enc->enc_pic.rc_layer_init[0].frame_rate_num != pic->rc.frame_rate_num) || (enc->enc_pic.rc_layer_init[0].frame_rate_den != pic->rc.frame_rate_den); + + if (enc->need_rate_control) { + enc->enc_pic.rc_per_pic.qp_i = 0; + enc->enc_pic.rc_per_pic.qp_p = 0; + } + + enc->need_rc_per_pic = + (!enc->enc_pic.rc_per_pic.qp_i && enc->enc_pic.rc_per_pic.qp_i != pic->rc.quant_i_frames) || + (!enc->enc_pic.rc_per_pic.qp_p && enc->enc_pic.rc_per_pic.qp_p != pic->rc.quant_p_frames); } else if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_AV1) { struct pipe_av1_enc_picture_desc *pic = (struct pipe_av1_enc_picture_desc *)picture; enc->need_rate_control = @@ -1073,6 +1113,15 @@ static void radeon_enc_begin_frame(struct pipe_video_codec *encoder, (enc->enc_pic.rc_layer_init[0].frame_rate_num != pic->rc[0].frame_rate_num) || (enc->enc_pic.rc_layer_init[0].frame_rate_den != pic->rc[0].frame_rate_den); + if (enc->need_rate_control) { + enc->enc_pic.rc_per_pic.qp_i = 0; + enc->enc_pic.rc_per_pic.qp_p = 0; + } + + enc->need_rc_per_pic = + (!enc->enc_pic.rc_per_pic.qp_i && enc->enc_pic.rc_per_pic.qp_i != pic->rc[0].qp) || + (!enc->enc_pic.rc_per_pic.qp_p && enc->enc_pic.rc_per_pic.qp_p != pic->rc[0].qp_inter); + if (!enc->cdf) { enc->cdf = CALLOC_STRUCT(rvid_buffer); if (setup_cdf(enc)) { @@ -1139,6 +1188,7 @@ static void radeon_enc_begin_frame(struct pipe_video_codec *encoder, flush(enc); si_vid_destroy_buffer(&fb); enc->need_rate_control = false; + enc->need_rc_per_pic = false; } return; diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h index b3b18080207..2c173057086 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc.h +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc.h @@ -271,6 +271,7 @@ struct radeon_encoder { bool emulation_prevention; bool need_feedback; bool need_rate_control; + bool need_rc_per_pic; unsigned dpb_size; unsigned roi_size; rvcn_enc_picture_info_t dpb_info[RENCODE_MAX_NUM_RECONSTRUCTED_PICTURES]; diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc_1_2.c index 0e9bfb670c0..d6d1cfdb941 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_1_2.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_1_2.c @@ -25,7 +25,7 @@ #define RENCODE_IB_PARAM_LAYER_SELECT 0x00000005 #define RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000006 #define RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000007 -#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x00000008 +#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x0000001d #define RENCODE_IB_PARAM_QUALITY_PARAMS 0x00000009 #define RENCODE_IB_PARAM_SLICE_HEADER 0x0000000a #define RENCODE_IB_PARAM_ENCODE_PARAMS 0x0000000b @@ -1187,14 +1187,25 @@ static void radeon_enc_intra_refresh(struct radeon_encoder *enc) static void radeon_enc_rc_per_pic(struct radeon_encoder *enc) { + enc->enc_pic.rc_per_pic.reserved_0xff = 0xFFFFFFFF; + RADEON_ENC_BEGIN(enc->cmd.rc_per_pic); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_app); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_app); - RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp_i); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp_p); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.qp_b); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_i); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_i); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_p); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_p); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.min_qp_b); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_qp_b); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size_i); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size_p); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.max_au_size_b); RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enabled_filler_data); RADEON_ENC_CS(enc->enc_pic.rc_per_pic.skip_frame_enable); RADEON_ENC_CS(enc->enc_pic.rc_per_pic.enforce_hrd); + RADEON_ENC_CS(enc->enc_pic.rc_per_pic.reserved_0xff); RADEON_ENC_END(); } @@ -1397,12 +1408,18 @@ static void encode(struct radeon_encoder *enc) enc->total_task_size = 0; enc->task_info(enc, enc->need_feedback); - if (enc->need_rate_control) { + if (enc->need_rate_control || enc->need_rc_per_pic) { i = 0; do { enc->enc_pic.layer_sel.temporal_layer_index = i; - enc->layer_select(enc); - enc->rc_layer_init(enc); + if (enc->need_rate_control) { + enc->layer_select(enc); + enc->rc_layer_init(enc); + } + if (enc->need_rc_per_pic) { + enc->layer_select(enc); + enc->rc_per_pic(enc); + } } while (++i < enc->enc_pic.num_temporal_layers); } diff --git a/src/gallium/drivers/radeonsi/radeon_vcn_enc_2_0.c b/src/gallium/drivers/radeonsi/radeon_vcn_enc_2_0.c index a6bfd70b97c..d59b61bca6e 100644 --- a/src/gallium/drivers/radeonsi/radeon_vcn_enc_2_0.c +++ b/src/gallium/drivers/radeonsi/radeon_vcn_enc_2_0.c @@ -24,7 +24,6 @@ #define RENCODE_IB_PARAM_LAYER_SELECT 0x00000005 #define RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT 0x00000006 #define RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT 0x00000007 -#define RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE 0x00000008 #define RENCODE_IB_PARAM_QUALITY_PARAMS 0x00000009 #define RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU 0x0000000a #define RENCODE_IB_PARAM_SLICE_HEADER 0x0000000b @@ -512,12 +511,18 @@ static void encode(struct radeon_encoder *enc) enc->total_task_size = 0; enc->task_info(enc, enc->need_feedback); - if (enc->need_rate_control) { + if (enc->need_rate_control || enc->need_rc_per_pic) { i = 0; do { enc->enc_pic.layer_sel.temporal_layer_index = i; - enc->layer_select(enc); - enc->rc_layer_init(enc); + if (enc->need_rate_control) { + enc->layer_select(enc); + enc->rc_layer_init(enc); + } + if (enc->need_rc_per_pic) { + enc->layer_select(enc); + enc->rc_per_pic(enc); + } } while (++i < enc->enc_pic.num_temporal_layers); } @@ -560,7 +565,6 @@ void radeon_enc_2_0_init(struct radeon_encoder *enc) enc->cmd.layer_select = RENCODE_IB_PARAM_LAYER_SELECT; enc->cmd.rc_session_init = RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT; enc->cmd.rc_layer_init = RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT; - enc->cmd.rc_per_pic = RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE; enc->cmd.quality_params = RENCODE_IB_PARAM_QUALITY_PARAMS; enc->cmd.nalu = RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU; enc->cmd.slice_header = RENCODE_IB_PARAM_SLICE_HEADER;