From 8dc7fbaed3766fcc0d00504fc68d0252dfd67a87 Mon Sep 17 00:00:00 2001 From: "Chan, Roy" Date: Sun, 22 Sep 2024 21:21:22 -0400 Subject: [PATCH] amd/vpelib: Revise the config sharing handling [WHY] - was hardcoded to store 16 configs only - as the config descriptor usage grows, more is needed - in bypass case, we also generate a new config which is a waste [HOW] - change to use vector to store configs - don't force new config desc if in bypass - revise the vector API, reduce the parameter passing [TESTING] - Tested with corresponding test cases Reviewed-by: Brendan Leder Acked-by: Chih-Wei Chien Signed-off-by: Roy Chan Part-of: --- src/amd/vpelib/inc/vpe_types.h | 10 +- .../vpelib/src/chip/vpe10/vpe10_cmd_builder.c | 42 ++-- .../vpelib/src/chip/vpe10/vpe10_resource.c | 6 +- src/amd/vpelib/src/core/background.c | 2 +- src/amd/vpelib/src/core/color.c | 16 +- src/amd/vpelib/src/core/inc/config_cache.h | 27 ++- src/amd/vpelib/src/core/inc/resource.h | 6 +- src/amd/vpelib/src/core/inc/vpe_priv.h | 16 +- src/amd/vpelib/src/core/resource.c | 187 +++++++++++------- src/amd/vpelib/src/core/vpelib.c | 77 +++++++- src/amd/vpelib/src/utils/inc/vector.h | 10 +- src/amd/vpelib/src/utils/vector.c | 15 +- 12 files changed, 270 insertions(+), 144 deletions(-) diff --git a/src/amd/vpelib/inc/vpe_types.h b/src/amd/vpelib/inc/vpe_types.h index d40ebad829f..a9b0acf93be 100644 --- a/src/amd/vpelib/inc/vpe_types.h +++ b/src/amd/vpelib/inc/vpe_types.h @@ -211,12 +211,12 @@ struct vpe_caps { uint32_t is_apu : 1; uint32_t bg_color_check_support : 1; struct { - int num_dpp; - int num_opp; - int num_mpc_3dlut; - int num_cdc_be; + uint32_t num_dpp; + uint32_t num_opp; + uint32_t num_mpc_3dlut; + uint32_t num_cdc_be; - int num_queue; /**< num of hw queue */ + uint32_t num_queue; /**< num of hw queue */ } resource_caps; struct vpe_color_caps color_caps; diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_cmd_builder.c b/src/amd/vpelib/src/chip/vpe10/vpe10_cmd_builder.c index e0ded5df0f3..9b9d6fd81b9 100644 --- a/src/amd/vpelib/src/chip/vpe10/vpe10_cmd_builder.c +++ b/src/amd/vpelib/src/chip/vpe10/vpe10_cmd_builder.c @@ -63,9 +63,11 @@ enum vpe_status vpe10_build_vpe_cmd( struct cmd_builder *builder = &vpe_priv->resource.cmd_builder; struct vpe_desc_writer *vpe_desc_writer = &vpe_priv->vpe_desc_writer; struct vpe_buf *emb_buf = &cur_bufs->emb_buf; - struct output_ctx *output_ctx; - struct pipe_ctx *pipe_ctx = NULL; + struct output_ctx *output_ctx; + struct pipe_ctx *pipe_ctx = NULL; uint32_t pipe_idx, config_idx; + struct vpe_vector *config_vector; + struct config_record *config; struct vpe_cmd_info *cmd_info = vpe_vector_get(vpe_priv->vpe_cmd_vector, cmd_idx); VPE_ASSERT(cmd_info); @@ -118,19 +120,22 @@ enum vpe_status vpe10_build_vpe_cmd( // follow the same order of config generation in "non-reuse" case // stream sharing - VPE_ASSERT(stream_ctx->num_configs[pipe_idx]); - for (config_idx = 0; config_idx < stream_ctx->num_configs[pipe_idx]; config_idx++) { - vpe_desc_writer->add_config_desc(vpe_desc_writer, - stream_ctx->configs[pipe_idx][config_idx].config_base_addr, reuse, - (uint8_t)emb_buf->tmz); + config_vector = stream_ctx->configs[pipe_idx]; + VPE_ASSERT(config_vector->num_elements); + for (config_idx = 0; config_idx < config_vector->num_elements; config_idx++) { + config = (struct config_record *)vpe_vector_get(config_vector, config_idx); + + vpe_desc_writer->add_config_desc( + vpe_desc_writer, config->config_base_addr, reuse, (uint8_t)emb_buf->tmz); } // stream-op sharing - for (config_idx = 0; config_idx < stream_ctx->num_stream_op_configs[pipe_idx][cmd_type]; - config_idx++) { - vpe_desc_writer->add_config_desc(vpe_desc_writer, - stream_ctx->stream_op_configs[pipe_idx][cmd_type][config_idx].config_base_addr, - reuse, (uint8_t)emb_buf->tmz); + config_vector = stream_ctx->stream_op_configs[pipe_idx][cmd_type]; + for (config_idx = 0; config_idx < config_vector->num_elements; config_idx++) { + config = (struct config_record *)vpe_vector_get(config_vector, config_idx); + + vpe_desc_writer->add_config_desc( + vpe_desc_writer, config->config_base_addr, reuse, (uint8_t)emb_buf->tmz); } // command specific @@ -148,14 +153,19 @@ enum vpe_status vpe10_build_vpe_cmd( // backend programming output_ctx = &vpe_priv->output_ctx; - if (!output_ctx->num_configs[0]) { + + config_vector = output_ctx->configs[0]; + if (!config_vector->num_elements) { vpe_priv->resource.program_backend(vpe_priv, pipe_ctx->pipe_idx, cmd_idx, false); } else { bool reuse = !vpe_priv->init.debug.disable_reuse_bit; + // re-use output register configs - for (config_idx = 0; config_idx < output_ctx->num_configs[0]; config_idx++) { - vpe_desc_writer->add_config_desc(vpe_desc_writer, - output_ctx->configs[0][config_idx].config_base_addr, reuse, (uint8_t)emb_buf->tmz); + for (config_idx = 0; config_idx < config_vector->num_elements; config_idx++) { + config = (struct config_record *)vpe_vector_get(config_vector, config_idx); + + vpe_desc_writer->add_config_desc( + vpe_desc_writer, config->config_base_addr, reuse, (uint8_t)emb_buf->tmz); } vpe_priv->resource.program_backend(vpe_priv, pipe_ctx->pipe_idx, cmd_idx, true); diff --git a/src/amd/vpelib/src/chip/vpe10/vpe10_resource.c b/src/amd/vpelib/src/chip/vpe10/vpe10_resource.c index a47e54c7ed7..6de708dadcf 100644 --- a/src/amd/vpelib/src/chip/vpe10/vpe10_resource.c +++ b/src/amd/vpelib/src/chip/vpe10/vpe10_resource.c @@ -951,7 +951,7 @@ enum vpe_status vpe10_populate_cmd_info(struct vpe_priv *vpe_priv) cmd_info.tm_enabled = tm_enabled; cmd_info.insert_start_csync = false; cmd_info.insert_end_csync = false; - vpe_vector_push(vpe_priv, vpe_priv->vpe_cmd_vector, &cmd_info); + vpe_vector_push(vpe_priv->vpe_cmd_vector, &cmd_info); // The following codes are only valid if blending is supported /* @@ -979,6 +979,7 @@ void vpe10_create_stream_ops_config(struct vpe_priv *vpe_priv, uint32_t pipe_idx struct dpp *dpp = vpe_priv->resource.dpp[pipe_idx]; struct mpc *mpc = vpe_priv->resource.mpc[pipe_idx]; enum vpe_cmd_type cmd_type = VPE_CMD_TYPE_COUNT; + struct vpe_vector *config_vector; vpe_priv->fe_cb_ctx.stream_op_sharing = true; vpe_priv->fe_cb_ctx.stream_sharing = false; @@ -995,7 +996,8 @@ void vpe10_create_stream_ops_config(struct vpe_priv *vpe_priv, uint32_t pipe_idx return; // return if already generated - if (stream_ctx->num_stream_op_configs[pipe_idx][cmd_type]) + config_vector = stream_ctx->stream_op_configs[pipe_idx][cmd_type]; + if (config_vector->num_elements) return; vpe_priv->fe_cb_ctx.cmd_type = cmd_type; diff --git a/src/amd/vpelib/src/core/background.c b/src/amd/vpelib/src/core/background.c index fed2eba9095..bc72f869fc7 100644 --- a/src/amd/vpelib/src/core/background.c +++ b/src/amd/vpelib/src/core/background.c @@ -118,7 +118,7 @@ void vpe_create_bg_segments( cmd_info.ops = ops; cmd_info.cd = (uint8_t)(gaps_cnt - gap_index - 1); cmd_info.tm_enabled = false; // currently only support frontend tm - vpe_vector_push(vpe_priv, vpe_priv->vpe_cmd_vector, &cmd_info); + vpe_vector_push(vpe_priv->vpe_cmd_vector, &cmd_info); } } diff --git a/src/amd/vpelib/src/core/color.c b/src/amd/vpelib/src/core/color.c index b0e12bafc2a..b2d64fb996c 100644 --- a/src/amd/vpelib/src/core/color.c +++ b/src/amd/vpelib/src/core/color.c @@ -181,7 +181,7 @@ static bool color_update_regamma_tf(struct vpe_priv *vpe_priv, break; } - for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { + for (uint32_t i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { if (vpe_priv->init.debug.disable_lut_caching || (output_tf->cache_info[i].cm_gamma_type != output_tf->cm_gamma_type) || (output_tf->cache_info[i].tf != output_tf->tf) || @@ -198,7 +198,7 @@ static bool color_update_regamma_tf(struct vpe_priv *vpe_priv, ret = vpe_color_calculate_regamma_params( vpe_priv, x_scale, y_scale, &vpe_priv->cal_buffer, output_tf); if (ret) { - for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { + for (uint32_t i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { // reset the cache status and mark as dirty to let hw layer to re-cache output_tf->dirty[i] = true; output_tf->config_cache[i].cached = false; @@ -244,7 +244,7 @@ static bool color_update_degamma_tf(struct vpe_priv *vpe_priv, break; } - for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { + for (uint32_t i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { if (vpe_priv->init.debug.disable_lut_caching || (input_tf->cache_info[i].cm_gamma_type != input_tf->cm_gamma_type) || (input_tf->cache_info[i].tf != input_tf->tf) || @@ -260,7 +260,7 @@ static bool color_update_degamma_tf(struct vpe_priv *vpe_priv, if (update) { ret = vpe_color_calculate_degamma_params(vpe_priv, x_scale, y_scale, input_tf); if (ret) { - for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { + for (uint32_t i = 0; i < vpe_priv->pub.caps->resource_caps.num_dpp; i++) { // reset the cache status and mark as dirty to let hw layer to re-cache input_tf->dirty[i] = true; input_tf->config_cache[i].cached = false; @@ -683,7 +683,7 @@ enum vpe_status vpe_color_update_3dlut( } else { bool update = false; - for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) + for (uint32_t i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) if (vpe_priv->init.debug.disable_lut_caching || (stream_ctx->lut3d_func->cache_info[i].uid_3dlut != stream_ctx->stream.tm_params.UID)) @@ -693,7 +693,7 @@ enum vpe_status vpe_color_update_3dlut( vpe_convert_to_tetrahedral( vpe_priv, stream_ctx->stream.tm_params.lut_data, stream_ctx->stream.tm_params.lut_dim, stream_ctx->lut3d_func); - for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) { + for (uint32_t i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) { stream_ctx->lut3d_func->dirty[i] = true; stream_ctx->lut3d_func->config_cache[i].cached = false; stream_ctx->lut3d_func->cache_info[i].uid_3dlut = stream_ctx->stream.tm_params.UID; @@ -830,7 +830,7 @@ enum vpe_status vpe_color_update_shaper(const struct vpe_priv *vpe_priv, uint16_ } // right now shaper is always programmed with linear, once cached, it is always reused. - for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) { + for (uint32_t i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) { if (vpe_priv->init.debug.disable_lut_caching || (shaper_func && (shaper_func->cache_info[i].tf != tf))) { // if the caching has the required data cached, skip the update @@ -849,7 +849,7 @@ enum vpe_status vpe_color_update_shaper(const struct vpe_priv *vpe_priv, uint16_ ret = vpe_build_shaper(&shaper_in, &shaper_func->pwl); if (ret == VPE_STATUS_OK) { - for (int i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) { + for (uint32_t i = 0; i < vpe_priv->pub.caps->resource_caps.num_mpc_3dlut; i++) { shaper_func->dirty[i] = true; shaper_func->config_cache[i].cached = false; shaper_func->cache_info[i].tf = tf; diff --git a/src/amd/vpelib/src/core/inc/config_cache.h b/src/amd/vpelib/src/core/inc/config_cache.h index 479ac7151c9..698d69886ef 100644 --- a/src/amd/vpelib/src/core/inc/config_cache.h +++ b/src/amd/vpelib/src/core/inc/config_cache.h @@ -42,17 +42,17 @@ * The upper layer has to indicate this object is dirty or not for the hw programming layer to * determine i. re-use the config cache? ii. cache the new settings? * - * Before using the CONFIG_CACHE(), make sure the function has these local variables visiable in the + * Before using the CONFIG_CACHE(), make sure the function has these local variables visible in the * same code block: * 1. struct config_writer *config_writer * - usually been declared with PROGRAM_ENTRY() * 2. a debug option that want to disable caching or not * 3. an input object that has the config_cache member * 4. the hw programming function that would generate command buffer content - * 5. the object that has num_configs which stores the generated configs + * 5. the input/output context that has configs vector which stores the generated configs * * Inside this CONFIG_CACHE macro it will clear the dirty bit after consuming the settings - * + * * Make sure to free up this cache object when the parent object is destroyed using * CONFIG_CACHE_FREE() * @@ -63,6 +63,7 @@ extern "C" { #endif struct vpe_priv; +struct vpe_vector; /* a common config cache structure to be included in the object that is for program hardware API * layer @@ -77,21 +78,21 @@ struct config_cache { * as bypass mode is not heavy lifting programming. * * /param obj_cache an object that has the config cache member - * /param obj_cfg_array an object that contains the configs and num_configs member + * /param ctx an input/output context that contains the configs vector * /param disable_cache a flag that controls a caching is needed * /param is_bypass if it is in bypass, it doesn't cache the bypass config * /param program_func_call the program call that generate config packet content * /param inst index to address the config_cache array */ -#define CONFIG_CACHE(obj_cache, obj_cfg_array, disable_cache, is_bypass, program_func_call, inst) \ +#define CONFIG_CACHE(obj_cache, ctx, disable_cache, is_bypass, program_func_call, inst) \ { \ bool use_cache = false; \ \ - /* make sure it opens a new config packet */ \ - config_writer_force_new_with_type(config_writer, CONFIG_TYPE_DIRECT); \ - \ if ((obj_cache) && !disable_cache && (obj_cache)->config_cache[inst].p_buffer && \ (obj_cache)->config_cache[inst].cached && !((obj_cache)->dirty[inst]) && !is_bypass) { \ + /* make sure it opens a new config packet */ \ + config_writer_force_new_with_type(config_writer, CONFIG_TYPE_DIRECT); \ + \ /* reuse the cache */ \ if (config_writer->buf->size >= (obj_cache)->config_cache[inst].size) { \ memcpy((void *)(uintptr_t)config_writer->base_cpu_va, \ @@ -109,7 +110,13 @@ struct config_cache { \ if (!use_cache) { \ uint64_t start, end; \ - uint16_t config_num = (uint16_t)(obj_cfg_array)->num_configs[inst]; \ + uint16_t num_config = (uint16_t)(ctx)->configs[inst]->num_elements; \ + \ + if (!is_bypass) { \ + /* make sure it opens a new config packet so we can cache a complete new config */ \ + /* for bypass we don't do caching, so no need to open a new desc */ \ + config_writer_force_new_with_type(config_writer, CONFIG_TYPE_DIRECT); \ + } \ \ start = config_writer->base_cpu_va; \ program_func_call; \ @@ -117,7 +124,7 @@ struct config_cache { \ if (!disable_cache && !is_bypass) { \ /* only cache when it is not crossing config packets */ \ - if (config_num == (obj_cfg_array)->num_configs[inst]) { \ + if (num_config == (ctx)->configs[inst]->num_elements) { \ if ((obj_cache)->dirty[inst]) { \ uint64_t size = end - start; \ \ diff --git a/src/amd/vpelib/src/core/inc/resource.h b/src/amd/vpelib/src/core/inc/resource.h index 0b6a32d347f..491a2f3aea5 100644 --- a/src/amd/vpelib/src/core/inc/resource.h +++ b/src/amd/vpelib/src/core/inc/resource.h @@ -42,7 +42,8 @@ struct vpe_priv; struct vpe_cmd_info; struct segment_ctx; -#define MIN_VPE_CMD 1024 +#define MIN_VPE_CMD (1024) +#define MIN_NUM_CONFIG (16) enum vpe_cmd_ops; @@ -132,9 +133,6 @@ struct stream_ctx *vpe_alloc_stream_ctx(struct vpe_priv *vpe_priv, uint32_t num_ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv); -/** output ctx */ -void vpe_free_output_ctx(struct vpe_priv *vpe_priv); - /** pipe resource management */ void vpe_pipe_reset(struct vpe_priv *vpe_priv); diff --git a/src/amd/vpelib/src/core/inc/vpe_priv.h b/src/amd/vpelib/src/core/inc/vpe_priv.h index 1174fdaa1eb..7694b264e12 100644 --- a/src/amd/vpelib/src/core/inc/vpe_priv.h +++ b/src/amd/vpelib/src/core/inc/vpe_priv.h @@ -56,8 +56,6 @@ extern "C" { #define MAX_LINE_SIZE 1024 // without 16 pixels for the seams #define MAX_LINE_CNT 4 -#define MAX_NUM_SAVED_CONFIG 16 - enum vpe_cmd_ops { VPE_CMD_OPS_BLENDING, VPE_CMD_OPS_BG, @@ -130,13 +128,9 @@ struct stream_ctx { uint16_t num_segments; struct segment_ctx *segment_ctx; - uint16_t num_configs[MAX_INPUT_PIPE]; // shared among same stream - uint16_t num_stream_op_configs[MAX_INPUT_PIPE][VPE_CMD_TYPE_COUNT]; - // shared among same cmd type, within the same stream - - struct config_record configs[MAX_INPUT_PIPE][MAX_NUM_SAVED_CONFIG]; - struct config_record stream_op_configs[MAX_INPUT_PIPE][VPE_CMD_TYPE_COUNT] - [MAX_NUM_SAVED_CONFIG]; + // share configs that can be re-used once generated + struct vpe_vector *configs[MAX_INPUT_PIPE]; + struct vpe_vector *stream_op_configs[MAX_INPUT_PIPE][VPE_CMD_TYPE_COUNT]; // cached color properties bool per_pixel_alpha; @@ -182,8 +176,8 @@ struct output_ctx { enum color_transfer_func tf; enum color_space cs; - uint32_t num_configs[MAX_OUTPUT_PIPE]; - struct config_record configs[MAX_OUTPUT_PIPE][MAX_NUM_SAVED_CONFIG]; + // store generated per-pipe configs that can be reused + struct vpe_vector *configs[MAX_OUTPUT_PIPE]; union { struct { diff --git a/src/amd/vpelib/src/core/resource.c b/src/amd/vpelib/src/core/resource.c index ba9a6db6d0f..0415a021c95 100644 --- a/src/amd/vpelib/src/core/resource.c +++ b/src/amd/vpelib/src/core/resource.c @@ -164,42 +164,75 @@ struct segment_ctx *vpe_alloc_segment_ctx(struct vpe_priv *vpe_priv, uint16_t nu return segment_ctx_base; } -struct stream_ctx *vpe_alloc_stream_ctx(struct vpe_priv *vpe_priv, uint32_t num_streams) +static enum vpe_status create_input_config_vector(struct stream_ctx *stream_ctx) { - struct stream_ctx *ctx_base, *ctx; - uint32_t i; + enum vpe_status res = VPE_STATUS_OK; + uint32_t pipe_idx, type_idx; + struct vpe_priv *vpe_priv; - ctx_base = (struct stream_ctx *)vpe_zalloc(sizeof(struct stream_ctx) * num_streams); - if (!ctx_base) - return NULL; + vpe_priv = stream_ctx->vpe_priv; - for (i = 0; i < num_streams; i++) { - ctx = &ctx_base[i]; - ctx->cs = COLOR_SPACE_UNKNOWN; - ctx->tf = TRANSFER_FUNC_UNKNOWN; - ctx->vpe_priv = vpe_priv; - vpe_color_set_adjustments_to_default(&ctx->color_adjustments); - ctx->tf_scaling_factor = vpe_fixpt_one; - ctx->stream.flags.geometric_scaling = 0; - ctx->stream.tm_params.UID = 0; - ctx->uid_3dlut = 0; + for (pipe_idx = 0; pipe_idx < vpe_priv->pub.caps->resource_caps.num_dpp; pipe_idx++) { + stream_ctx->configs[pipe_idx] = + vpe_vector_create(vpe_priv, sizeof(struct config_record), MIN_NUM_CONFIG); + if (!stream_ctx->configs[pipe_idx]) { + res = VPE_STATUS_NO_MEMORY; + break; + } + + for (type_idx = 0; type_idx < VPE_CMD_TYPE_COUNT; type_idx++) { + stream_ctx->stream_op_configs[pipe_idx][type_idx] = + vpe_vector_create(vpe_priv, sizeof(struct config_record), MIN_NUM_CONFIG); + if (!stream_ctx->stream_op_configs[pipe_idx][type_idx]) { + res = VPE_STATUS_NO_MEMORY; + break; + } + } + + if (res != VPE_STATUS_OK) + break; } - return ctx_base; + return res; } -void vpe_free_stream_ctx(struct vpe_priv *vpe_priv) +static void destroy_input_config_vector(struct stream_ctx *stream_ctx) { - uint16_t i; - struct stream_ctx *ctx; + uint32_t pipe_idx, type_idx; + struct vpe_priv *vpe_priv; - if (!vpe_priv->stream_ctx || !vpe_priv->num_streams) + vpe_priv = stream_ctx->vpe_priv; + + for (pipe_idx = 0; pipe_idx < vpe_priv->pub.caps->resource_caps.num_dpp; pipe_idx++) { + if (stream_ctx->configs[pipe_idx]) { + vpe_vector_free(stream_ctx->configs[pipe_idx]); + stream_ctx->configs[pipe_idx] = NULL; + } + + for (type_idx = 0; type_idx < VPE_CMD_TYPE_COUNT; type_idx++) { + if (stream_ctx->stream_op_configs[pipe_idx][type_idx]) { + vpe_vector_free(stream_ctx->stream_op_configs[pipe_idx][type_idx]); + stream_ctx->stream_op_configs[pipe_idx][type_idx] = NULL; + } + } + } +} + +static void free_stream_ctx(uint32_t num_streams, struct stream_ctx *stream_ctx) +{ + struct vpe_priv *vpe_priv; + uint32_t stream_idx; + + if (!stream_ctx || !num_streams) return; - for (i = 0; i < vpe_priv->num_streams; i++) { - ctx = &vpe_priv->stream_ctx[i]; + vpe_priv = stream_ctx[0].vpe_priv; + + for (stream_idx = 0; stream_idx < num_streams; stream_idx++) { + struct stream_ctx *ctx = &stream_ctx[stream_idx]; + if (ctx->input_tf) { - for (int j = 0; j < MAX_INPUT_PIPE; j++) + for (uint32_t j = 0; j < MAX_INPUT_PIPE; j++) CONFIG_CACHE_FREE(ctx->input_tf->config_cache[j]); vpe_free(ctx->input_tf); ctx->input_tf = NULL; @@ -221,21 +254,21 @@ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv) } if (ctx->in_shaper_func) { - for (int j = 0; j < MAX_INPUT_PIPE; j++) + for (uint32_t j = 0; j < MAX_INPUT_PIPE; j++) CONFIG_CACHE_FREE(ctx->in_shaper_func->config_cache[j]); vpe_free(ctx->in_shaper_func); ctx->in_shaper_func = NULL; } if (ctx->blend_tf) { - for (int j = 0; j < MAX_INPUT_PIPE; j++) + for (uint32_t j = 0; j < MAX_INPUT_PIPE; j++) CONFIG_CACHE_FREE(ctx->blend_tf->config_cache[j]); vpe_free(ctx->blend_tf); ctx->blend_tf = NULL; } if (ctx->lut3d_func) { - for (int j = 0; j < MAX_3DLUT; j++) + for (uint32_t j = 0; j < MAX_3DLUT; j++) CONFIG_CACHE_FREE(ctx->lut3d_func->config_cache[j]); vpe_free(ctx->lut3d_func); ctx->lut3d_func = NULL; @@ -245,20 +278,53 @@ void vpe_free_stream_ctx(struct vpe_priv *vpe_priv) vpe_free(ctx->segment_ctx); ctx->segment_ctx = NULL; } + + destroy_input_config_vector(ctx); } - vpe_free(vpe_priv->stream_ctx); - vpe_priv->stream_ctx = NULL; - vpe_priv->num_streams = 0; - vpe_priv->num_virtual_streams = 0; } -void vpe_free_output_ctx(struct vpe_priv *vpe_priv) +struct stream_ctx *vpe_alloc_stream_ctx(struct vpe_priv *vpe_priv, uint32_t num_streams) { - if (vpe_priv->output_ctx.gamut_remap) - vpe_free(vpe_priv->output_ctx.gamut_remap); + struct stream_ctx *ctx_base, *ctx; + uint32_t stream_idx; + enum vpe_status res = VPE_STATUS_OK; - if (vpe_priv->output_ctx.output_tf) - vpe_free(vpe_priv->output_ctx.output_tf); + ctx_base = (struct stream_ctx *)vpe_zalloc(sizeof(struct stream_ctx) * num_streams); + if (!ctx_base) + return NULL; + + for (stream_idx = 0; stream_idx < num_streams; stream_idx++) { + ctx = &ctx_base[stream_idx]; + ctx->cs = COLOR_SPACE_UNKNOWN; + ctx->tf = TRANSFER_FUNC_UNKNOWN; + ctx->vpe_priv = vpe_priv; + vpe_color_set_adjustments_to_default(&ctx->color_adjustments); + ctx->tf_scaling_factor = vpe_fixpt_one; + ctx->stream.flags.geometric_scaling = 0; + ctx->stream.tm_params.UID = 0; + ctx->uid_3dlut = 0; + + if ((res = create_input_config_vector(ctx)) != VPE_STATUS_OK) + break; + } + + if (res != VPE_STATUS_OK) { + free_stream_ctx(num_streams, ctx_base); + ctx_base = NULL; + } + return ctx_base; +} + +void vpe_free_stream_ctx(struct vpe_priv *vpe_priv) +{ + if (vpe_priv->num_streams && vpe_priv->stream_ctx) { + free_stream_ctx(vpe_priv->num_streams, vpe_priv->stream_ctx); + vpe_free(vpe_priv->stream_ctx); + } + + vpe_priv->stream_ctx = NULL; + vpe_priv->num_streams = 0; + vpe_priv->num_virtual_streams = 0; } void vpe_pipe_reset(struct vpe_priv *vpe_priv) @@ -697,35 +763,24 @@ void vpe_resource_build_bit_depth_reduction_params( void vpe_frontend_config_callback( void *ctx, uint64_t cfg_base_gpu, uint64_t cfg_base_cpu, uint64_t size, uint32_t pipe_idx) { - struct config_frontend_cb_ctx *cb_ctx = (struct config_frontend_cb_ctx*)ctx; - struct vpe_priv *vpe_priv = cb_ctx->vpe_priv; - struct stream_ctx *stream_ctx = &vpe_priv->stream_ctx[cb_ctx->stream_idx]; - enum vpe_cmd_type cmd_type; + struct config_frontend_cb_ctx *cb_ctx = (struct config_frontend_cb_ctx *)ctx; + struct vpe_priv *vpe_priv = cb_ctx->vpe_priv; + struct stream_ctx *stream_ctx = &vpe_priv->stream_ctx[cb_ctx->stream_idx]; + enum vpe_cmd_type cmd_type; + struct config_record record; if (cb_ctx->stream_sharing) { - VPE_ASSERT(stream_ctx->num_configs[pipe_idx] < - (int)(sizeof(stream_ctx->configs[pipe_idx]) / sizeof(struct config_record))); + record.config_base_addr = cfg_base_gpu; + record.config_size = size; - stream_ctx->configs[pipe_idx][stream_ctx->num_configs[pipe_idx]].config_base_addr = - cfg_base_gpu; - stream_ctx->configs[pipe_idx][stream_ctx->num_configs[pipe_idx]].config_size = size; - stream_ctx->num_configs[pipe_idx]++; + vpe_vector_push(stream_ctx->configs[pipe_idx], &record); } else if (cb_ctx->stream_op_sharing) { cmd_type = cb_ctx->cmd_type; - VPE_ASSERT(stream_ctx->num_stream_op_configs[pipe_idx][cmd_type] < - (int)(sizeof(stream_ctx->stream_op_configs[pipe_idx][cmd_type]) / - sizeof(struct config_record))); + record.config_base_addr = cfg_base_gpu; + record.config_size = size; - stream_ctx - ->stream_op_configs[pipe_idx][cmd_type] - [stream_ctx->num_stream_op_configs[pipe_idx][cmd_type]] - .config_base_addr = cfg_base_gpu; - stream_ctx - ->stream_op_configs[pipe_idx][cmd_type] - [stream_ctx->num_stream_op_configs[pipe_idx][cmd_type]] - .config_size = size; - stream_ctx->num_stream_op_configs[pipe_idx][cmd_type]++; + vpe_vector_push(stream_ctx->stream_op_configs[pipe_idx][cmd_type], &record); } vpe_priv->vpe_desc_writer.add_config_desc( @@ -735,18 +790,16 @@ void vpe_frontend_config_callback( void vpe_backend_config_callback( void *ctx, uint64_t cfg_base_gpu, uint64_t cfg_base_cpu, uint64_t size, uint32_t pipe_idx) { - struct config_backend_cb_ctx *cb_ctx = (struct config_backend_cb_ctx*)ctx; - struct vpe_priv *vpe_priv = cb_ctx->vpe_priv; - struct output_ctx *output_ctx = &vpe_priv->output_ctx; + struct config_backend_cb_ctx *cb_ctx = (struct config_backend_cb_ctx *)ctx; + struct vpe_priv *vpe_priv = cb_ctx->vpe_priv; + struct output_ctx *output_ctx = &vpe_priv->output_ctx; + struct config_record record; if (cb_ctx->share) { - VPE_ASSERT(output_ctx->num_configs[pipe_idx] < - (sizeof(output_ctx->configs[pipe_idx]) / sizeof(struct config_record))); + record.config_base_addr = cfg_base_gpu; + record.config_size = size; - output_ctx->configs[pipe_idx][output_ctx->num_configs[pipe_idx]].config_base_addr = - cfg_base_gpu; - output_ctx->configs[pipe_idx][output_ctx->num_configs[pipe_idx]].config_size = size; - output_ctx->num_configs[pipe_idx]++; + vpe_vector_push(output_ctx->configs[pipe_idx], &record); } vpe_priv->vpe_desc_writer.add_config_desc( diff --git a/src/amd/vpelib/src/core/vpelib.c b/src/amd/vpelib/src/core/vpelib.c index 419a668968f..594f1598a99 100644 --- a/src/amd/vpelib/src/core/vpelib.c +++ b/src/amd/vpelib/src/core/vpelib.c @@ -128,7 +128,6 @@ static void override_debug_option( debug->disable_lut_caching = user_debug->disable_lut_caching; } -#ifdef VPE_BUILD_1_1 static void verify_collaboration_mode(struct vpe_priv *vpe_priv) { if (vpe_priv->pub.level == VPE_IP_LEVEL_1_1) { @@ -142,7 +141,44 @@ static void verify_collaboration_mode(struct vpe_priv *vpe_priv) vpe_priv->collaboration_mode = false; } } -#endif + +static enum vpe_status create_output_config_vector(struct vpe_priv *vpe_priv) +{ + uint32_t i; + + // output config vector stores all share-able configs that can be re-used later + for (i = 0; i < vpe_priv->pub.caps->resource_caps.num_cdc_be; i++) { + vpe_priv->output_ctx.configs[i] = + vpe_vector_create(vpe_priv, sizeof(struct config_record), MIN_NUM_CONFIG); + if (!vpe_priv->output_ctx.configs[i]) { + return VPE_STATUS_NO_MEMORY; + } + } + return VPE_STATUS_OK; +} + +static void destroy_output_config_vector(struct vpe_priv *vpe_priv) +{ + uint32_t i; + + for (i = 0; i < vpe_priv->pub.caps->resource_caps.num_cdc_be; i++) { + if (vpe_priv->output_ctx.configs[i]) { + vpe_vector_free(vpe_priv->output_ctx.configs[i]); + vpe_priv->output_ctx.configs[i] = NULL; + } + } +} + +static void free_output_ctx(struct vpe_priv *vpe_priv) +{ + if (vpe_priv->output_ctx.gamut_remap) + vpe_free(vpe_priv->output_ctx.gamut_remap); + + if (vpe_priv->output_ctx.output_tf) + vpe_free(vpe_priv->output_ctx.output_tf); + + destroy_output_config_vector(vpe_priv); +} struct vpe *vpe_create(const struct vpe_init_data *params) { @@ -178,6 +214,14 @@ struct vpe *vpe_create(const struct vpe_init_data *params) vpe_free(vpe_priv); return NULL; } + + status = create_output_config_vector(vpe_priv); + if (status != VPE_STATUS_OK) { + destroy_output_config_vector(vpe_priv); + vpe_free(vpe_priv); + return NULL; + } + override_debug_option(&vpe_priv->init.debug, ¶ms->debug); vpe_color_setup_x_points_distribution(); @@ -204,12 +248,12 @@ void vpe_destroy(struct vpe **vpe) vpe_destroy_resource(vpe_priv, &vpe_priv->resource); - vpe_free_output_ctx(vpe_priv); + free_output_ctx(vpe_priv); vpe_free_stream_ctx(vpe_priv); if (vpe_priv->vpe_cmd_vector) - vpe_vector_free(vpe_priv, vpe_priv->vpe_cmd_vector); + vpe_vector_free(vpe_priv->vpe_cmd_vector); if (vpe_priv->dummy_input_param) vpe_free(vpe_priv->dummy_input_param); @@ -629,13 +673,15 @@ enum vpe_status vpe_build_commands( struct vpe_priv *vpe_priv; struct cmd_builder *builder; enum vpe_status status = VPE_STATUS_OK; - uint32_t cmd_idx, i, pipe_idx, stream_idx, cmd_type_idx; + uint32_t cmd_idx, pipe_idx, stream_idx, cmd_type_idx; struct vpe_build_bufs curr_bufs; int64_t cmd_buf_size; int64_t emb_buf_size; uint64_t cmd_buf_gpu_a, cmd_buf_cpu_a; uint64_t emb_buf_gpu_a, emb_buf_cpu_a; + struct vpe_vector *config_vector; struct vpe_cmd_info *cmd_info; + if (!vpe || !param || !bufs) return VPE_STATUS_ERROR; @@ -686,15 +732,26 @@ enum vpe_status vpe_build_commands( // copy the param, reset saved configs for (stream_idx = 0; stream_idx < vpe_priv->num_streams; stream_idx++) { + struct stream_ctx *stream_ctx = &vpe_priv->stream_ctx[stream_idx]; + for (pipe_idx = 0; pipe_idx < MAX_INPUT_PIPE; pipe_idx++) { - vpe_priv->stream_ctx[stream_idx].num_configs[pipe_idx] = 0; - for (cmd_type_idx = 0; cmd_type_idx < VPE_CMD_TYPE_COUNT; cmd_type_idx++) - vpe_priv->stream_ctx[stream_idx].num_stream_op_configs[pipe_idx][cmd_type_idx] = 0; + config_vector = stream_ctx->configs[pipe_idx]; + if (config_vector) + vpe_vector_clear(config_vector); + + for (cmd_type_idx = 0; cmd_type_idx < VPE_CMD_TYPE_COUNT; cmd_type_idx++) { + config_vector = stream_ctx->stream_op_configs[pipe_idx][cmd_type_idx]; + if (config_vector) + vpe_vector_clear(config_vector); + } } } - for (i = 0; i < MAX_OUTPUT_PIPE; i++) - vpe_priv->output_ctx.num_configs[i] = 0; + for (pipe_idx = 0; pipe_idx < vpe_priv->pub.caps->resource_caps.num_cdc_be; pipe_idx++) { + config_vector = vpe_priv->output_ctx.configs[pipe_idx]; + if (config_vector) + vpe_vector_clear(config_vector); + } // Reset pipes vpe_pipe_reset(vpe_priv); diff --git a/src/amd/vpelib/src/utils/inc/vector.h b/src/amd/vpelib/src/utils/inc/vector.h index 95006d1f97b..070b73af302 100644 --- a/src/amd/vpelib/src/utils/inc/vector.h +++ b/src/amd/vpelib/src/utils/inc/vector.h @@ -31,7 +31,11 @@ extern "C" { #endif +struct vpe_priv; + struct vpe_vector { + struct vpe_priv *vpe_priv; /*< store the vpe_priv for alloc/free memory */ + void *element; /*< the internal vector memory storage */ size_t num_elements; /*< number of stored elements */ size_t capacity; @@ -57,11 +61,10 @@ void *vpe_vector_get(struct vpe_vector *vector, size_t idx); /** * Push the element to end of the vector. - * @param[in] vpe_priv vpe instance created by vpe_create() * @param[in] vector vector that we want to push to the end. * @param[in] p_element pointer of the element */ -void vpe_vector_push(struct vpe_priv *vpe_priv, struct vpe_vector *vector, void *p_element); +void vpe_vector_push(struct vpe_vector *vector, void *p_element); /** * Clear the vector. @@ -71,10 +74,9 @@ void vpe_vector_clear(struct vpe_vector *vector); /** * Free the vector. - * @param[in] vpe_priv vpe instance created by vpe_create() * @param[in] vector vector that we want to free. */ -void vpe_vector_free(struct vpe_priv *vpe_priv, struct vpe_vector *vpe_vector); +void vpe_vector_free(struct vpe_vector *vpe_vector); #ifdef __cplusplus } diff --git a/src/amd/vpelib/src/utils/vector.c b/src/amd/vpelib/src/utils/vector.c index d9eca9f6a48..02bd3cd58fb 100644 --- a/src/amd/vpelib/src/utils/vector.c +++ b/src/amd/vpelib/src/utils/vector.c @@ -39,6 +39,7 @@ struct vpe_vector *vpe_vector_create( return NULL; } + vector->vpe_priv = vpe_priv; vector->num_elements = 0; vector->capacity = initial_capacity; vector->element_size = element_size; @@ -46,9 +47,10 @@ struct vpe_vector *vpe_vector_create( return vector; } -static struct vpe_vector *vector_realloc( - struct vpe_priv *vpe_priv, struct vpe_vector *vector, size_t new_size) +static struct vpe_vector *vector_realloc(struct vpe_vector *vector, size_t new_size) { + struct vpe_priv *vpe_priv = vector->vpe_priv; + void *new_element = vpe_zalloc(new_size); if (!new_element) return NULL; @@ -70,14 +72,14 @@ void *vpe_vector_get(struct vpe_vector *vector, size_t idx) return (void *)((char *)(vector->element) + (idx * vector->element_size)); } -void vpe_vector_push(struct vpe_priv *vpe_priv, struct vpe_vector *vector, void *p_element) +void vpe_vector_push(struct vpe_vector *vector, void *p_element) { if (!p_element || !vector) return; if (vector->num_elements >= vector->capacity) { vector->capacity *= 2; - vector = vector_realloc(vpe_priv, vector, vector->capacity * vector->element_size); + vector = vector_realloc(vector, vector->capacity * vector->element_size); } if (!vector) @@ -97,10 +99,11 @@ void vpe_vector_clear(struct vpe_vector *vector) memset(vector->element, 0, vector->capacity * vector->element_size); } -void vpe_vector_free(struct vpe_priv *vpe_priv, struct vpe_vector *vector) +void vpe_vector_free(struct vpe_vector *vector) { + struct vpe_priv *vpe_priv = vector->vpe_priv; + vpe_free(vector->element); vector->element = NULL; vpe_free(vector); - vector = NULL; }