amd/vpelib: Add API function to get taps

A module to calculate the number of taps is added to the API.
Additionally, the get_optimal_taps module is moved from dpp to resource.

Reviewed-by: Roy Chan <Roy.Chan@amd.com>
Acked-by: Jack Chih <chiachih@amd.com>
Signed-off-by: Navid Assadian <navid.assadian@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30531>
This commit is contained in:
Assadian, Navid
2024-04-09 12:45:30 -04:00
committed by Marge Bot
parent 4fc221524c
commit 699f88f844
7 changed files with 84 additions and 70 deletions

View File

@@ -526,8 +526,8 @@ struct vpe_scaling_filter_coeffs {
unsigned int nb_phases; unsigned int nb_phases;
uint16_t horiz_polyphase_coeffs[MAX_NB_POLYPHASE_COEFFS]; /*max nb of taps is 4, max nb of uint16_t horiz_polyphase_coeffs[MAX_NB_POLYPHASE_COEFFS]; /*max nb of taps is 4, max nb of
phases 33 = (32+1)*/ phases 33 = (32+1)*/
uint16_t vert_polyphase_coeffs[MAX_NB_POLYPHASE_COEFFS]; /*max nb of taps is 4, max nb of phases uint16_t vert_polyphase_coeffs[MAX_NB_POLYPHASE_COEFFS]; /*max nb of taps is 4, max nb of
33 = (32+1)*/ phases 33 = (32+1)*/
}; };
struct vpe_hdr_metadata { struct vpe_hdr_metadata {

View File

@@ -101,6 +101,14 @@ enum vpe_status vpe_build_noops(struct vpe *vpe, uint32_t num_dwords, uint32_t *
enum vpe_status vpe_build_commands( enum vpe_status vpe_build_commands(
struct vpe *vpe, const struct vpe_build_param *param, struct vpe_build_bufs *bufs); struct vpe *vpe, const struct vpe_build_param *param, struct vpe_build_bufs *bufs);
/**
* get the optimal number of taps based on the scaling ratio.
* @param[in] vpe vpe instance created by vpe_create()
* @param[in,out] scaling_info [in] source and destination rectangles [out] calculated taps.
*/
void vpe_get_optimal_num_of_taps(struct vpe *vpe, struct vpe_scaling_info *scaling_info);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -857,7 +857,7 @@ struct vpe10_dpp {
void vpe10_construct_dpp(struct vpe_priv *vpe_priv, struct dpp *dpp); void vpe10_construct_dpp(struct vpe_priv *vpe_priv, struct dpp *dpp);
bool vpe10_dpp_get_optimal_number_of_taps( bool vpe10_dpp_get_optimal_number_of_taps(
struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *in_taps); struct vpe_rect *src_rect, struct vpe_rect *dst_rect, struct vpe_scaling_taps *taps);
void vpe10_dscl_calc_lb_num_partitions(const struct scaler_data *scl_data, void vpe10_dscl_calc_lb_num_partitions(const struct scaler_data *scl_data,
enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c); enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c);

View File

@@ -23,6 +23,7 @@
*/ */
#include <string.h> #include <string.h>
#include <math.h>
#include "common.h" #include "common.h"
#include "vpe_priv.h" #include "vpe_priv.h"
#include "vpe10_dpp.h" #include "vpe10_dpp.h"
@@ -65,78 +66,70 @@ void vpe10_construct_dpp(struct vpe_priv *vpe_priv, struct dpp *dpp)
} }
bool vpe10_dpp_get_optimal_number_of_taps( bool vpe10_dpp_get_optimal_number_of_taps(
struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *in_taps) struct vpe_rect *src_rect, struct vpe_rect *dst_rect, struct vpe_scaling_taps *taps)
{ {
struct vpe_priv *vpe_priv = dpp->vpe_priv; double h_ratio = 1.0, v_ratio = 1.0;
uint32_t h_taps_min = 0, v_taps_min = 0; uint32_t h_taps = 1, v_taps = 1;
/* if (taps->h_taps > 8 || taps->v_taps > 8 || taps->h_taps_c > 8 || taps->v_taps_c > 8)
* Set default taps if none are provided
* From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling
* taps = 4 for upscaling
*/
if (in_taps->h_taps > 8 || in_taps->v_taps > 8 || in_taps->h_taps_c > 8 ||
in_taps->v_taps_c > 8)
return false; return false;
if (vpe_fixpt_ceil(scl_data->ratios.horz) > 1) /*
h_taps_min = (uint32_t)max(4, min(2 * vpe_fixpt_ceil(scl_data->ratios.horz), 8)); * if calculated taps are greater than 8, it means the downscaling ratio is greater than 4:1,
else * and since the given taps are used by default, if the given taps are less than the
h_taps_min = (uint32_t)4; * calculated ones, the image quality will not be good, so vpelib would reject this case.
*/
if (in_taps->h_taps == 0) { // Horizontal taps
scl_data->taps.h_taps = h_taps_min;
h_ratio = (double)src_rect->width / (double)dst_rect->width;
if (src_rect->width == dst_rect->width) {
h_taps = 1;
} else if (h_ratio > 1) {
h_taps = (uint32_t)max(4, ceil(h_ratio * 2.0));
} else { } else {
if (in_taps->h_taps < h_taps_min) h_taps = 4;
return false;
scl_data->taps.h_taps = in_taps->h_taps;
} }
if (vpe_fixpt_ceil(scl_data->ratios.vert) > 1) if (h_taps != 1) {
v_taps_min = h_taps += h_taps % 2;
(uint32_t)max(4, min(vpe_fixpt_ceil(vpe_fixpt_mul_int(scl_data->ratios.vert, 2)), 8));
else
v_taps_min = (uint32_t)4;
if (in_taps->v_taps == 0) {
scl_data->taps.v_taps = v_taps_min;
} else {
if (in_taps->v_taps < v_taps_min)
return false;
scl_data->taps.v_taps = in_taps->v_taps;
} }
if (in_taps->h_taps_c == 0) { if (taps->h_taps == 0 && h_taps <= 8) {
// default to 2 as mmd only uses bilinear for chroma taps->h_taps = h_taps;
scl_data->taps.h_taps_c = (uint32_t)2; } else if (taps->h_taps < h_taps || h_taps > 8) {
} else return false;
scl_data->taps.h_taps_c = in_taps->h_taps_c; }
if (in_taps->v_taps_c == 0) { // Vertical taps
// default to 2 as mmd only uses bilinear for chroma v_ratio = (double)src_rect->height / (double)dst_rect->height;
scl_data->taps.v_taps_c = (uint32_t)2;
} else
scl_data->taps.v_taps_c = in_taps->v_taps_c;
/* taps can be either 1 or an even number */ if (src_rect->height == dst_rect->height) {
if (scl_data->taps.h_taps % 2 && scl_data->taps.h_taps != 1) v_taps = 1;
scl_data->taps.h_taps++; } else if (v_ratio > 1) {
v_taps = (uint32_t)max(4, ceil(v_ratio * 2.0));
} else {
v_taps = 4;
}
if (scl_data->taps.v_taps % 2 && scl_data->taps.v_taps != 1) if (v_taps != 1) {
scl_data->taps.v_taps++; v_taps += v_taps % 2;
}
if (scl_data->taps.h_taps_c % 2 && scl_data->taps.h_taps_c != 1) if (taps->v_taps == 0 && v_taps <= 8) {
scl_data->taps.h_taps_c++; taps->v_taps = v_taps;
} else if (taps->v_taps < v_taps || v_taps > 8) {
return false;
}
if (scl_data->taps.v_taps_c % 2 && scl_data->taps.v_taps_c != 1) // Chroma taps
scl_data->taps.v_taps_c++; if (taps->h_taps_c == 0) {
taps->h_taps_c = 2;
}
// bypass scaler if all ratios are 1 if (taps->v_taps_c == 0) {
if (IDENTITY_RATIO(scl_data->ratios.horz)) taps->v_taps_c = 2;
scl_data->taps.h_taps = 1; }
if (IDENTITY_RATIO(scl_data->ratios.vert))
scl_data->taps.v_taps = 1;
return true; return true;
} }

View File

@@ -176,18 +176,19 @@ static struct vpe_caps caps = {
static bool vpe10_init_scaler_data(struct vpe_priv *vpe_priv, struct stream_ctx *stream_ctx, static bool vpe10_init_scaler_data(struct vpe_priv *vpe_priv, struct stream_ctx *stream_ctx,
struct scaler_data *scl_data, struct vpe_rect *src_rect, struct vpe_rect *dst_rect) struct scaler_data *scl_data, struct vpe_rect *src_rect, struct vpe_rect *dst_rect)
{ {
struct dpp *dpp = vpe_priv->resource.dpp[0]; struct dpp *dpp;
dpp = vpe_priv->resource.dpp[0];
calculate_scaling_ratios(scl_data, src_rect, dst_rect, stream_ctx->stream.surface_info.format); calculate_scaling_ratios(scl_data, src_rect, dst_rect, stream_ctx->stream.surface_info.format);
if (vpe_priv->init.debug.skip_optimal_tap_check) { scl_data->taps.v_taps = stream_ctx->stream.scaling_info.taps.v_taps;
scl_data->taps.v_taps = stream_ctx->stream.scaling_info.taps.v_taps; scl_data->taps.h_taps = stream_ctx->stream.scaling_info.taps.h_taps;
scl_data->taps.h_taps = stream_ctx->stream.scaling_info.taps.h_taps; scl_data->taps.v_taps_c = stream_ctx->stream.scaling_info.taps.v_taps_c;
scl_data->taps.v_taps_c = stream_ctx->stream.scaling_info.taps.v_taps_c; scl_data->taps.h_taps_c = stream_ctx->stream.scaling_info.taps.h_taps_c;
scl_data->taps.h_taps_c = stream_ctx->stream.scaling_info.taps.h_taps_c; if (!vpe_priv->init.debug.skip_optimal_tap_check) {
} else { if (!dpp->funcs->get_optimal_number_of_taps(src_rect, dst_rect, &scl_data->taps)) {
if (!dpp->funcs->get_optimal_number_of_taps(
dpp, scl_data, &stream_ctx->stream.scaling_info.taps))
return false; return false;
}
} }
if ((stream_ctx->stream.use_external_scaling_coeffs == if ((stream_ctx->stream.use_external_scaling_coeffs ==

View File

@@ -72,7 +72,7 @@ enum input_csc_select {
struct dpp_funcs { struct dpp_funcs {
bool (*get_optimal_number_of_taps)( bool (*get_optimal_number_of_taps)(
struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *taps); struct vpe_rect *src_rect, struct vpe_rect *dst_rect, struct vpe_scaling_taps *taps);
void (*dscl_calc_lb_num_partitions)(const struct scaler_data *scl_data, void (*dscl_calc_lb_num_partitions)(const struct scaler_data *scl_data,
enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c); enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c);

View File

@@ -728,3 +728,15 @@ enum vpe_status vpe_build_commands(
return status; return status;
} }
void vpe_get_optimal_num_of_taps(struct vpe *vpe, struct vpe_scaling_info *scaling_info)
{
struct vpe_priv *vpe_priv;
struct dpp *dpp;
vpe_priv = container_of(vpe, struct vpe_priv, pub);
dpp = vpe_priv->resource.dpp[0];
dpp->funcs->get_optimal_number_of_taps(
&scaling_info->src_rect, &scaling_info->dst_rect, &scaling_info->taps);
}