amd/vpelib: Add API function to get taps
A module to calculate the number of taps is added to the API. Additionally, the get_optimal_taps module is moved from dpp to resource. Reviewed-by: Roy Chan <Roy.Chan@amd.com> Acked-by: Jack Chih <chiachih@amd.com> Signed-off-by: Navid Assadian <navid.assadian@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30531>
This commit is contained in:

committed by
Marge Bot

parent
4fc221524c
commit
699f88f844
@@ -526,8 +526,8 @@ struct vpe_scaling_filter_coeffs {
|
||||
unsigned int nb_phases;
|
||||
uint16_t horiz_polyphase_coeffs[MAX_NB_POLYPHASE_COEFFS]; /*max nb of taps is 4, max nb of
|
||||
phases 33 = (32+1)*/
|
||||
uint16_t vert_polyphase_coeffs[MAX_NB_POLYPHASE_COEFFS]; /*max nb of taps is 4, max nb of phases
|
||||
33 = (32+1)*/
|
||||
uint16_t vert_polyphase_coeffs[MAX_NB_POLYPHASE_COEFFS]; /*max nb of taps is 4, max nb of
|
||||
phases 33 = (32+1)*/
|
||||
};
|
||||
|
||||
struct vpe_hdr_metadata {
|
||||
|
@@ -101,6 +101,14 @@ enum vpe_status vpe_build_noops(struct vpe *vpe, uint32_t num_dwords, uint32_t *
|
||||
enum vpe_status vpe_build_commands(
|
||||
struct vpe *vpe, const struct vpe_build_param *param, struct vpe_build_bufs *bufs);
|
||||
|
||||
/**
|
||||
* get the optimal number of taps based on the scaling ratio.
|
||||
* @param[in] vpe vpe instance created by vpe_create()
|
||||
* @param[in,out] scaling_info [in] source and destination rectangles [out] calculated taps.
|
||||
*/
|
||||
|
||||
void vpe_get_optimal_num_of_taps(struct vpe *vpe, struct vpe_scaling_info *scaling_info);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -857,7 +857,7 @@ struct vpe10_dpp {
|
||||
void vpe10_construct_dpp(struct vpe_priv *vpe_priv, struct dpp *dpp);
|
||||
|
||||
bool vpe10_dpp_get_optimal_number_of_taps(
|
||||
struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *in_taps);
|
||||
struct vpe_rect *src_rect, struct vpe_rect *dst_rect, struct vpe_scaling_taps *taps);
|
||||
|
||||
void vpe10_dscl_calc_lb_num_partitions(const struct scaler_data *scl_data,
|
||||
enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c);
|
||||
|
@@ -23,6 +23,7 @@
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include "common.h"
|
||||
#include "vpe_priv.h"
|
||||
#include "vpe10_dpp.h"
|
||||
@@ -65,78 +66,70 @@ void vpe10_construct_dpp(struct vpe_priv *vpe_priv, struct dpp *dpp)
|
||||
}
|
||||
|
||||
bool vpe10_dpp_get_optimal_number_of_taps(
|
||||
struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *in_taps)
|
||||
struct vpe_rect *src_rect, struct vpe_rect *dst_rect, struct vpe_scaling_taps *taps)
|
||||
{
|
||||
struct vpe_priv *vpe_priv = dpp->vpe_priv;
|
||||
uint32_t h_taps_min = 0, v_taps_min = 0;
|
||||
/*
|
||||
* Set default taps if none are provided
|
||||
* From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling
|
||||
* taps = 4 for upscaling
|
||||
*/
|
||||
if (in_taps->h_taps > 8 || in_taps->v_taps > 8 || in_taps->h_taps_c > 8 ||
|
||||
in_taps->v_taps_c > 8)
|
||||
double h_ratio = 1.0, v_ratio = 1.0;
|
||||
uint32_t h_taps = 1, v_taps = 1;
|
||||
if (taps->h_taps > 8 || taps->v_taps > 8 || taps->h_taps_c > 8 || taps->v_taps_c > 8)
|
||||
return false;
|
||||
|
||||
if (vpe_fixpt_ceil(scl_data->ratios.horz) > 1)
|
||||
h_taps_min = (uint32_t)max(4, min(2 * vpe_fixpt_ceil(scl_data->ratios.horz), 8));
|
||||
else
|
||||
h_taps_min = (uint32_t)4;
|
||||
/*
|
||||
* if calculated taps are greater than 8, it means the downscaling ratio is greater than 4:1,
|
||||
* and since the given taps are used by default, if the given taps are less than the
|
||||
* calculated ones, the image quality will not be good, so vpelib would reject this case.
|
||||
*/
|
||||
|
||||
if (in_taps->h_taps == 0) {
|
||||
scl_data->taps.h_taps = h_taps_min;
|
||||
// Horizontal taps
|
||||
|
||||
h_ratio = (double)src_rect->width / (double)dst_rect->width;
|
||||
|
||||
if (src_rect->width == dst_rect->width) {
|
||||
h_taps = 1;
|
||||
} else if (h_ratio > 1) {
|
||||
h_taps = (uint32_t)max(4, ceil(h_ratio * 2.0));
|
||||
} else {
|
||||
if (in_taps->h_taps < h_taps_min)
|
||||
return false;
|
||||
|
||||
scl_data->taps.h_taps = in_taps->h_taps;
|
||||
h_taps = 4;
|
||||
}
|
||||
|
||||
if (vpe_fixpt_ceil(scl_data->ratios.vert) > 1)
|
||||
v_taps_min =
|
||||
(uint32_t)max(4, min(vpe_fixpt_ceil(vpe_fixpt_mul_int(scl_data->ratios.vert, 2)), 8));
|
||||
else
|
||||
v_taps_min = (uint32_t)4;
|
||||
|
||||
if (in_taps->v_taps == 0) {
|
||||
scl_data->taps.v_taps = v_taps_min;
|
||||
} else {
|
||||
if (in_taps->v_taps < v_taps_min)
|
||||
return false;
|
||||
|
||||
scl_data->taps.v_taps = in_taps->v_taps;
|
||||
if (h_taps != 1) {
|
||||
h_taps += h_taps % 2;
|
||||
}
|
||||
|
||||
if (in_taps->h_taps_c == 0) {
|
||||
// default to 2 as mmd only uses bilinear for chroma
|
||||
scl_data->taps.h_taps_c = (uint32_t)2;
|
||||
} else
|
||||
scl_data->taps.h_taps_c = in_taps->h_taps_c;
|
||||
if (taps->h_taps == 0 && h_taps <= 8) {
|
||||
taps->h_taps = h_taps;
|
||||
} else if (taps->h_taps < h_taps || h_taps > 8) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (in_taps->v_taps_c == 0) {
|
||||
// default to 2 as mmd only uses bilinear for chroma
|
||||
scl_data->taps.v_taps_c = (uint32_t)2;
|
||||
} else
|
||||
scl_data->taps.v_taps_c = in_taps->v_taps_c;
|
||||
// Vertical taps
|
||||
v_ratio = (double)src_rect->height / (double)dst_rect->height;
|
||||
|
||||
/* taps can be either 1 or an even number */
|
||||
if (scl_data->taps.h_taps % 2 && scl_data->taps.h_taps != 1)
|
||||
scl_data->taps.h_taps++;
|
||||
if (src_rect->height == dst_rect->height) {
|
||||
v_taps = 1;
|
||||
} else if (v_ratio > 1) {
|
||||
v_taps = (uint32_t)max(4, ceil(v_ratio * 2.0));
|
||||
} else {
|
||||
v_taps = 4;
|
||||
}
|
||||
|
||||
if (scl_data->taps.v_taps % 2 && scl_data->taps.v_taps != 1)
|
||||
scl_data->taps.v_taps++;
|
||||
if (v_taps != 1) {
|
||||
v_taps += v_taps % 2;
|
||||
}
|
||||
|
||||
if (scl_data->taps.h_taps_c % 2 && scl_data->taps.h_taps_c != 1)
|
||||
scl_data->taps.h_taps_c++;
|
||||
if (taps->v_taps == 0 && v_taps <= 8) {
|
||||
taps->v_taps = v_taps;
|
||||
} else if (taps->v_taps < v_taps || v_taps > 8) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (scl_data->taps.v_taps_c % 2 && scl_data->taps.v_taps_c != 1)
|
||||
scl_data->taps.v_taps_c++;
|
||||
// Chroma taps
|
||||
if (taps->h_taps_c == 0) {
|
||||
taps->h_taps_c = 2;
|
||||
}
|
||||
|
||||
// bypass scaler if all ratios are 1
|
||||
if (IDENTITY_RATIO(scl_data->ratios.horz))
|
||||
scl_data->taps.h_taps = 1;
|
||||
if (IDENTITY_RATIO(scl_data->ratios.vert))
|
||||
scl_data->taps.v_taps = 1;
|
||||
if (taps->v_taps_c == 0) {
|
||||
taps->v_taps_c = 2;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@@ -176,18 +176,19 @@ static struct vpe_caps caps = {
|
||||
static bool vpe10_init_scaler_data(struct vpe_priv *vpe_priv, struct stream_ctx *stream_ctx,
|
||||
struct scaler_data *scl_data, struct vpe_rect *src_rect, struct vpe_rect *dst_rect)
|
||||
{
|
||||
struct dpp *dpp = vpe_priv->resource.dpp[0];
|
||||
struct dpp *dpp;
|
||||
dpp = vpe_priv->resource.dpp[0];
|
||||
|
||||
calculate_scaling_ratios(scl_data, src_rect, dst_rect, stream_ctx->stream.surface_info.format);
|
||||
|
||||
if (vpe_priv->init.debug.skip_optimal_tap_check) {
|
||||
scl_data->taps.v_taps = stream_ctx->stream.scaling_info.taps.v_taps;
|
||||
scl_data->taps.h_taps = stream_ctx->stream.scaling_info.taps.h_taps;
|
||||
scl_data->taps.v_taps_c = stream_ctx->stream.scaling_info.taps.v_taps_c;
|
||||
scl_data->taps.h_taps_c = stream_ctx->stream.scaling_info.taps.h_taps_c;
|
||||
} else {
|
||||
if (!dpp->funcs->get_optimal_number_of_taps(
|
||||
dpp, scl_data, &stream_ctx->stream.scaling_info.taps))
|
||||
scl_data->taps.v_taps = stream_ctx->stream.scaling_info.taps.v_taps;
|
||||
scl_data->taps.h_taps = stream_ctx->stream.scaling_info.taps.h_taps;
|
||||
scl_data->taps.v_taps_c = stream_ctx->stream.scaling_info.taps.v_taps_c;
|
||||
scl_data->taps.h_taps_c = stream_ctx->stream.scaling_info.taps.h_taps_c;
|
||||
if (!vpe_priv->init.debug.skip_optimal_tap_check) {
|
||||
if (!dpp->funcs->get_optimal_number_of_taps(src_rect, dst_rect, &scl_data->taps)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if ((stream_ctx->stream.use_external_scaling_coeffs ==
|
||||
|
@@ -72,7 +72,7 @@ enum input_csc_select {
|
||||
struct dpp_funcs {
|
||||
|
||||
bool (*get_optimal_number_of_taps)(
|
||||
struct dpp *dpp, struct scaler_data *scl_data, const struct vpe_scaling_taps *taps);
|
||||
struct vpe_rect *src_rect, struct vpe_rect *dst_rect, struct vpe_scaling_taps *taps);
|
||||
|
||||
void (*dscl_calc_lb_num_partitions)(const struct scaler_data *scl_data,
|
||||
enum lb_memory_config lb_config, uint32_t *num_part_y, uint32_t *num_part_c);
|
||||
|
@@ -728,3 +728,15 @@ enum vpe_status vpe_build_commands(
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void vpe_get_optimal_num_of_taps(struct vpe *vpe, struct vpe_scaling_info *scaling_info)
|
||||
{
|
||||
struct vpe_priv *vpe_priv;
|
||||
struct dpp *dpp;
|
||||
|
||||
vpe_priv = container_of(vpe, struct vpe_priv, pub);
|
||||
dpp = vpe_priv->resource.dpp[0];
|
||||
|
||||
dpp->funcs->get_optimal_number_of_taps(
|
||||
&scaling_info->src_rect, &scaling_info->dst_rect, &scaling_info->taps);
|
||||
}
|
||||
|
Reference in New Issue
Block a user