amd/vpelib: Add API function to get taps

A module to calculate the number of taps is added to the API.
Additionally, the get_optimal_taps module is moved from dpp to resource.

Reviewed-by: Roy Chan <Roy.Chan@amd.com>
Acked-by: Jack Chih <chiachih@amd.com>
Signed-off-by: Navid Assadian <navid.assadian@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30531>
This commit is contained in:
Assadian, Navid
2024-04-09 12:45:30 -04:00
committed by Marge Bot
parent 4fc221524c
commit 699f88f844
7 changed files with 84 additions and 70 deletions

View File

@@ -526,8 +526,8 @@ struct vpe_scaling_filter_coeffs {
unsigned int nb_phases;
uint16_t horiz_polyphase_coeffs[MAX_NB_POLYPHASE_COEFFS]; /*max nb of taps is 4, max nb of
phases 33 = (32+1)*/
uint16_t vert_polyphase_coeffs[MAX_NB_POLYPHASE_COEFFS]; /*max nb of taps is 4, max nb of phases
33 = (32+1)*/
uint16_t vert_polyphase_coeffs[MAX_NB_POLYPHASE_COEFFS]; /*max nb of taps is 4, max nb of
phases 33 = (32+1)*/
};
struct vpe_hdr_metadata {

View File

@@ -101,6 +101,14 @@ enum vpe_status vpe_build_noops(struct vpe *vpe, uint32_t num_dwords, uint32_t *
enum vpe_status vpe_build_commands(
struct vpe *vpe, const struct vpe_build_param *param, struct vpe_build_bufs *bufs);
/**
* get the optimal number of taps based on the scaling ratio.
* @param[in] vpe vpe instance created by vpe_create()
* @param[in,out] scaling_info [in] source and destination rectangles [out] calculated taps.
*/
void vpe_get_optimal_num_of_taps(struct vpe *vpe, struct vpe_scaling_info *scaling_info);
#ifdef __cplusplus
}
#endif

View File

@@ -857,7 +857,7 @@ struct vpe10_dpp {
void vpe10_construct_dpp(struct vpe_priv *vpe_priv, struct dpp *dpp);
bool vpe10_dpp_get_optimal_number_of_taps(
struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *in_taps);
struct vpe_rect *src_rect, struct vpe_rect *dst_rect, struct vpe_scaling_taps *taps);
void vpe10_dscl_calc_lb_num_partitions(const struct scaler_data *scl_data,
enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c);

View File

@@ -23,6 +23,7 @@
*/
#include <string.h>
#include <math.h>
#include "common.h"
#include "vpe_priv.h"
#include "vpe10_dpp.h"
@@ -65,78 +66,70 @@ void vpe10_construct_dpp(struct vpe_priv *vpe_priv, struct dpp *dpp)
}
bool vpe10_dpp_get_optimal_number_of_taps(
struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *in_taps)
struct vpe_rect *src_rect, struct vpe_rect *dst_rect, struct vpe_scaling_taps *taps)
{
struct vpe_priv *vpe_priv = dpp->vpe_priv;
uint32_t h_taps_min = 0, v_taps_min = 0;
/*
* Set default taps if none are provided
* From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling
* taps = 4 for upscaling
*/
if (in_taps->h_taps > 8 || in_taps->v_taps > 8 || in_taps->h_taps_c > 8 ||
in_taps->v_taps_c > 8)
double h_ratio = 1.0, v_ratio = 1.0;
uint32_t h_taps = 1, v_taps = 1;
if (taps->h_taps > 8 || taps->v_taps > 8 || taps->h_taps_c > 8 || taps->v_taps_c > 8)
return false;
if (vpe_fixpt_ceil(scl_data->ratios.horz) > 1)
h_taps_min = (uint32_t)max(4, min(2 * vpe_fixpt_ceil(scl_data->ratios.horz), 8));
else
h_taps_min = (uint32_t)4;
/*
* if calculated taps are greater than 8, it means the downscaling ratio is greater than 4:1,
* and since the given taps are used by default, if the given taps are less than the
* calculated ones, the image quality will not be good, so vpelib would reject this case.
*/
if (in_taps->h_taps == 0) {
scl_data->taps.h_taps = h_taps_min;
// Horizontal taps
h_ratio = (double)src_rect->width / (double)dst_rect->width;
if (src_rect->width == dst_rect->width) {
h_taps = 1;
} else if (h_ratio > 1) {
h_taps = (uint32_t)max(4, ceil(h_ratio * 2.0));
} else {
if (in_taps->h_taps < h_taps_min)
return false;
scl_data->taps.h_taps = in_taps->h_taps;
h_taps = 4;
}
if (vpe_fixpt_ceil(scl_data->ratios.vert) > 1)
v_taps_min =
(uint32_t)max(4, min(vpe_fixpt_ceil(vpe_fixpt_mul_int(scl_data->ratios.vert, 2)), 8));
else
v_taps_min = (uint32_t)4;
if (in_taps->v_taps == 0) {
scl_data->taps.v_taps = v_taps_min;
} else {
if (in_taps->v_taps < v_taps_min)
return false;
scl_data->taps.v_taps = in_taps->v_taps;
if (h_taps != 1) {
h_taps += h_taps % 2;
}
if (in_taps->h_taps_c == 0) {
// default to 2 as mmd only uses bilinear for chroma
scl_data->taps.h_taps_c = (uint32_t)2;
} else
scl_data->taps.h_taps_c = in_taps->h_taps_c;
if (taps->h_taps == 0 && h_taps <= 8) {
taps->h_taps = h_taps;
} else if (taps->h_taps < h_taps || h_taps > 8) {
return false;
}
if (in_taps->v_taps_c == 0) {
// default to 2 as mmd only uses bilinear for chroma
scl_data->taps.v_taps_c = (uint32_t)2;
} else
scl_data->taps.v_taps_c = in_taps->v_taps_c;
// Vertical taps
v_ratio = (double)src_rect->height / (double)dst_rect->height;
/* taps can be either 1 or an even number */
if (scl_data->taps.h_taps % 2 && scl_data->taps.h_taps != 1)
scl_data->taps.h_taps++;
if (src_rect->height == dst_rect->height) {
v_taps = 1;
} else if (v_ratio > 1) {
v_taps = (uint32_t)max(4, ceil(v_ratio * 2.0));
} else {
v_taps = 4;
}
if (scl_data->taps.v_taps % 2 && scl_data->taps.v_taps != 1)
scl_data->taps.v_taps++;
if (v_taps != 1) {
v_taps += v_taps % 2;
}
if (scl_data->taps.h_taps_c % 2 && scl_data->taps.h_taps_c != 1)
scl_data->taps.h_taps_c++;
if (taps->v_taps == 0 && v_taps <= 8) {
taps->v_taps = v_taps;
} else if (taps->v_taps < v_taps || v_taps > 8) {
return false;
}
if (scl_data->taps.v_taps_c % 2 && scl_data->taps.v_taps_c != 1)
scl_data->taps.v_taps_c++;
// Chroma taps
if (taps->h_taps_c == 0) {
taps->h_taps_c = 2;
}
// bypass scaler if all ratios are 1
if (IDENTITY_RATIO(scl_data->ratios.horz))
scl_data->taps.h_taps = 1;
if (IDENTITY_RATIO(scl_data->ratios.vert))
scl_data->taps.v_taps = 1;
if (taps->v_taps_c == 0) {
taps->v_taps_c = 2;
}
return true;
}

View File

@@ -176,18 +176,19 @@ static struct vpe_caps caps = {
static bool vpe10_init_scaler_data(struct vpe_priv *vpe_priv, struct stream_ctx *stream_ctx,
struct scaler_data *scl_data, struct vpe_rect *src_rect, struct vpe_rect *dst_rect)
{
struct dpp *dpp = vpe_priv->resource.dpp[0];
struct dpp *dpp;
dpp = vpe_priv->resource.dpp[0];
calculate_scaling_ratios(scl_data, src_rect, dst_rect, stream_ctx->stream.surface_info.format);
if (vpe_priv->init.debug.skip_optimal_tap_check) {
scl_data->taps.v_taps = stream_ctx->stream.scaling_info.taps.v_taps;
scl_data->taps.h_taps = stream_ctx->stream.scaling_info.taps.h_taps;
scl_data->taps.v_taps_c = stream_ctx->stream.scaling_info.taps.v_taps_c;
scl_data->taps.h_taps_c = stream_ctx->stream.scaling_info.taps.h_taps_c;
} else {
if (!dpp->funcs->get_optimal_number_of_taps(
dpp, scl_data, &stream_ctx->stream.scaling_info.taps))
scl_data->taps.v_taps = stream_ctx->stream.scaling_info.taps.v_taps;
scl_data->taps.h_taps = stream_ctx->stream.scaling_info.taps.h_taps;
scl_data->taps.v_taps_c = stream_ctx->stream.scaling_info.taps.v_taps_c;
scl_data->taps.h_taps_c = stream_ctx->stream.scaling_info.taps.h_taps_c;
if (!vpe_priv->init.debug.skip_optimal_tap_check) {
if (!dpp->funcs->get_optimal_number_of_taps(src_rect, dst_rect, &scl_data->taps)) {
return false;
}
}
if ((stream_ctx->stream.use_external_scaling_coeffs ==

View File

@@ -72,7 +72,7 @@ enum input_csc_select {
struct dpp_funcs {
bool (*get_optimal_number_of_taps)(
struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *taps);
struct vpe_rect *src_rect, struct vpe_rect *dst_rect, struct vpe_scaling_taps *taps);
void (*dscl_calc_lb_num_partitions)(const struct scaler_data *scl_data,
enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c);

View File

@@ -728,3 +728,15 @@ enum vpe_status vpe_build_commands(
return status;
}
void vpe_get_optimal_num_of_taps(struct vpe *vpe, struct vpe_scaling_info *scaling_info)
{
struct vpe_priv *vpe_priv;
struct dpp *dpp;
vpe_priv = container_of(vpe, struct vpe_priv, pub);
dpp = vpe_priv->resource.dpp[0];
dpp->funcs->get_optimal_number_of_taps(
&scaling_info->src_rect, &scaling_info->dst_rect, &scaling_info->taps);
}