ac: add ac_vtx_format_info

This will be used by RADV and ACO.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17894>
This commit is contained in:
Rhys Perry
2022-07-29 19:34:47 +01:00
committed by Marge Bot
parent dfbb4b384a
commit 6a2ada93b4
8 changed files with 159 additions and 27 deletions

View File

@@ -421,6 +421,114 @@ const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt)
return &data_format_table[dfmt];
}
#define DUP2(v) v, v
#define DUP3(v) v, v, v
#define DUP4(v) v, v, v, v
#define FMT(dfmt, nfmt) 0xb, {HW_FMT(dfmt, nfmt), HW_FMT(dfmt##_##dfmt, nfmt), HW_FMT_INVALID, HW_FMT(dfmt##_##dfmt##_##dfmt##_##dfmt, nfmt)}
#define FMT_32(nfmt) 0xf, {HW_FMT(32, nfmt), HW_FMT(32_32, nfmt), HW_FMT(32_32_32, nfmt), HW_FMT(32_32_32_32, nfmt)}
#define FMT_64(nfmt) 0x3, {HW_FMT(32_32, nfmt), HW_FMT(32_32_32_32, nfmt), DUP2(HW_FMT_INVALID)}
#define FMTP(dfmt, nfmt) 0xf, {DUP4(HW_FMT(dfmt, nfmt))}
#define DST_SEL(x, y, z, w) \
(S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_##x) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_##y) | \
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_##z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_##w))
#define LIST_NFMT_8_16(nfmt) \
[(int)PIPE_FORMAT_R8_##nfmt] = {DST_SEL(X,0,0,1), 1, 1, 1, FMT(8, nfmt)}, \
[(int)PIPE_FORMAT_R8G8_##nfmt] = {DST_SEL(X,Y,0,1), 2, 2, 1, FMT(8, nfmt)}, \
[(int)PIPE_FORMAT_R8G8B8_##nfmt] = {DST_SEL(X,Y,Z,1), 3, 3, 1, FMT(8, nfmt)}, \
[(int)PIPE_FORMAT_B8G8R8_##nfmt] = {DST_SEL(Z,Y,X,1), 3, 3, 1, FMT(8, nfmt)}, \
[(int)PIPE_FORMAT_R8G8B8A8_##nfmt] = {DST_SEL(X,Y,Z,W), 4, 4, 1, FMT(8, nfmt)}, \
[(int)PIPE_FORMAT_B8G8R8A8_##nfmt] = {DST_SEL(Z,Y,X,W), 4, 4, 1, FMT(8, nfmt)}, \
[(int)PIPE_FORMAT_R16_##nfmt] = {DST_SEL(X,0,0,1), 2, 1, 2, FMT(16, nfmt)}, \
[(int)PIPE_FORMAT_R16G16_##nfmt] = {DST_SEL(X,Y,0,1), 4, 2, 2, FMT(16, nfmt)}, \
[(int)PIPE_FORMAT_R16G16B16_##nfmt] = {DST_SEL(X,Y,Z,1), 6, 3, 2, FMT(16, nfmt)}, \
[(int)PIPE_FORMAT_R16G16B16A16_##nfmt] = {DST_SEL(X,Y,Z,W), 8, 4, 2, FMT(16, nfmt)},
#define LIST_NFMT_32_64(nfmt) \
[(int)PIPE_FORMAT_R32_##nfmt] = {DST_SEL(X,0,0,1), 4, 1, 4, FMT_32(nfmt)}, \
[(int)PIPE_FORMAT_R32G32_##nfmt] = {DST_SEL(X,Y,0,1), 8, 2, 4, FMT_32(nfmt)}, \
[(int)PIPE_FORMAT_R32G32B32_##nfmt] = {DST_SEL(X,Y,Z,1), 12, 3, 4, FMT_32(nfmt)}, \
[(int)PIPE_FORMAT_R32G32B32A32_##nfmt] = {DST_SEL(X,Y,Z,W), 16, 4, 4, FMT_32(nfmt)}, \
[(int)PIPE_FORMAT_R64_##nfmt] = {DST_SEL(X,Y,0,0), 8, 1, 8, FMT_64(nfmt)}, \
[(int)PIPE_FORMAT_R64G64_##nfmt] = {DST_SEL(X,Y,Z,W), 16, 2, 8, FMT_64(nfmt)}, \
[(int)PIPE_FORMAT_R64G64B64_##nfmt] = {DST_SEL(X,Y,Z,W), 24, 3, 8, FMT_64(nfmt)}, \
[(int)PIPE_FORMAT_R64G64B64A64_##nfmt] = {DST_SEL(X,Y,Z,W), 32, 4, 8, FMT_64(nfmt)}, \
#define VB_FORMATS \
[(int)PIPE_FORMAT_NONE] = {DST_SEL(0,0,0,1), 0, 4, 0, 0xf, {DUP4(HW_FMT_INVALID)}}, \
LIST_NFMT_8_16(UNORM) \
LIST_NFMT_8_16(SNORM) \
LIST_NFMT_8_16(USCALED) \
LIST_NFMT_8_16(SSCALED) \
LIST_NFMT_8_16(UINT) \
LIST_NFMT_8_16(SINT) \
LIST_NFMT_32_64(UINT) \
LIST_NFMT_32_64(SINT) \
LIST_NFMT_32_64(FLOAT) \
[(int)PIPE_FORMAT_R16_FLOAT] = {DST_SEL(X,0,0,1), 2, 1, 2, FMT(16, FLOAT)}, \
[(int)PIPE_FORMAT_R16G16_FLOAT] = {DST_SEL(X,Y,0,1), 4, 2, 2, FMT(16, FLOAT)}, \
[(int)PIPE_FORMAT_R16G16B16_FLOAT] = {DST_SEL(X,Y,Z,1), 6, 3, 2, FMT(16, FLOAT)}, \
[(int)PIPE_FORMAT_R16G16B16A16_FLOAT] = {DST_SEL(X,Y,Z,W), 8, 4, 2, FMT(16, FLOAT)}, \
[(int)PIPE_FORMAT_B10G10R10A2_UNORM] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, UNORM)}, \
[(int)PIPE_FORMAT_B10G10R10A2_SNORM] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, SNORM), \
AA(AC_ALPHA_ADJUST_SNORM)}, \
[(int)PIPE_FORMAT_B10G10R10A2_USCALED] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, USCALED)}, \
[(int)PIPE_FORMAT_B10G10R10A2_SSCALED] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, SSCALED), \
AA(AC_ALPHA_ADJUST_SSCALED)}, \
[(int)PIPE_FORMAT_B10G10R10A2_UINT] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, UINT)}, \
[(int)PIPE_FORMAT_B10G10R10A2_SINT] = {DST_SEL(Z,Y,X,W), 4, 4, 0, FMTP(2_10_10_10, SINT), \
AA(AC_ALPHA_ADJUST_SINT)}, \
[(int)PIPE_FORMAT_R10G10B10A2_UNORM] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, UNORM)}, \
[(int)PIPE_FORMAT_R10G10B10A2_SNORM] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, SNORM), \
AA(AC_ALPHA_ADJUST_SNORM)}, \
[(int)PIPE_FORMAT_R10G10B10A2_USCALED] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, USCALED)}, \
[(int)PIPE_FORMAT_R10G10B10A2_SSCALED] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, SSCALED), \
AA(AC_ALPHA_ADJUST_SSCALED)}, \
[(int)PIPE_FORMAT_R10G10B10A2_UINT] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, UINT)}, \
[(int)PIPE_FORMAT_R10G10B10A2_SINT] = {DST_SEL(X,Y,Z,W), 4, 4, 0, FMTP(2_10_10_10, SINT), \
AA(AC_ALPHA_ADJUST_SINT)}, \
[(int)PIPE_FORMAT_R11G11B10_FLOAT] = {DST_SEL(X,Y,Z,W), 4, 3, 0, FMTP(10_11_11, FLOAT)}, \
#define HW_FMT(dfmt, nfmt) (V_008F0C_BUF_DATA_FORMAT_##dfmt | (V_008F0C_BUF_NUM_FORMAT_##nfmt << 4))
#define HW_FMT_INVALID (V_008F0C_BUF_DATA_FORMAT_INVALID | (V_008F0C_BUF_NUM_FORMAT_UNORM << 4))
#define AA(v) v
static const struct ac_vtx_format_info vb_formats_gfx6_alpha_adjust[] = {VB_FORMATS};
#undef AA
#define AA(v) AC_ALPHA_ADJUST_NONE
static const struct ac_vtx_format_info vb_formats_gfx6[] = {VB_FORMATS};
#undef HW_FMT_INVALID
#undef HW_FMT
#define HW_FMT(dfmt, nfmt) V_008F0C_GFX10_FORMAT_##dfmt##_##nfmt
#define HW_FMT_INVALID V_008F0C_GFX10_FORMAT_INVALID
static const struct ac_vtx_format_info vb_formats_gfx10[] = {VB_FORMATS};
#undef HW_FMT_INVALID
#undef HW_FMT
#define HW_FMT(dfmt, nfmt) V_008F0C_GFX11_FORMAT_##dfmt##_##nfmt
#define HW_FMT_INVALID V_008F0C_GFX11_FORMAT_INVALID
static const struct ac_vtx_format_info vb_formats_gfx11[] = {VB_FORMATS};
const struct ac_vtx_format_info *
ac_get_vtx_format_info_table(enum amd_gfx_level level, enum radeon_family family)
{
if (level >= GFX11)
return vb_formats_gfx11;
else if (level >= GFX10)
return vb_formats_gfx10;
bool alpha_adjust = level <= GFX8 && family != CHIP_STONEY;
return alpha_adjust ? vb_formats_gfx6_alpha_adjust : vb_formats_gfx6;
}
const struct ac_vtx_format_info *
ac_get_vtx_format_info(enum amd_gfx_level level, enum radeon_family family, enum pipe_format fmt)
{
return &ac_get_vtx_format_info_table(level, family)[fmt];
}
enum ac_image_dim ac_get_sampler_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim dim,
bool is_array)
{

View File

@@ -28,6 +28,7 @@
#include "amd_family.h"
#include "compiler/nir/nir.h"
#include "compiler/shader_enums.h"
#include "util/format/u_format.h"
#include <stdbool.h>
#include <stdint.h>
@@ -55,6 +56,29 @@ struct ac_data_format_info {
uint8_t chan_format;
};
enum ac_vs_input_alpha_adjust {
AC_ALPHA_ADJUST_NONE = 0,
AC_ALPHA_ADJUST_SNORM = 1,
AC_ALPHA_ADJUST_SSCALED = 2,
AC_ALPHA_ADJUST_SINT = 3,
};
struct ac_vtx_format_info {
uint16_t dst_sel;
uint8_t element_size;
uint8_t num_channels;
uint8_t chan_byte_size; /* 0 for packed formats */
/* These last three are dependent on the family. */
uint8_t has_hw_format;
/* Index is number of channels minus one. Use any index for packed formats.
* GFX6-8 is dfmt[0:3],nfmt[4:7].
*/
uint8_t hw_format[4];
enum ac_vs_input_alpha_adjust alpha_adjust : 8;
};
struct ac_spi_color_formats {
unsigned normal : 8;
unsigned alpha : 8;
@@ -101,6 +125,13 @@ unsigned ac_get_tbuffer_format(enum amd_gfx_level gfx_level, unsigned dfmt, unsi
const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt);
const struct ac_vtx_format_info *ac_get_vtx_format_info_table(enum amd_gfx_level level,
enum radeon_family family);
const struct ac_vtx_format_info *ac_get_vtx_format_info(enum amd_gfx_level level,
enum radeon_family family,
enum pipe_format fmt);
enum ac_image_dim ac_get_sampler_dim(enum amd_gfx_level gfx_level, enum glsl_sampler_dim dim,
bool is_array);

View File

@@ -12353,7 +12353,7 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shade
unsigned alpha_adjust = (key->state.alpha_adjust_lo >> loc) & 0x1;
alpha_adjust |= ((key->state.alpha_adjust_hi >> loc) & 0x1) << 1;
if (alpha_adjust == ALPHA_ADJUST_SSCALED)
if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED)
bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(alpha, v1), Operand(alpha, v1));
/* For the integer-like cases, do a natural sign extension.
@@ -12362,16 +12362,16 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shade
* and happen to contain 0, 1, 2, 3 as the two LSBs of the
* exponent.
*/
unsigned offset = alpha_adjust == ALPHA_ADJUST_SNORM ? 23u : 0u;
unsigned offset = alpha_adjust == AC_ALPHA_ADJUST_SNORM ? 23u : 0u;
bld.vop3(aco_opcode::v_bfe_i32, Definition(alpha, v1), Operand(alpha, v1),
Operand::c32(offset), Operand::c32(2u));
/* Convert back to the right type. */
if (alpha_adjust == ALPHA_ADJUST_SNORM) {
if (alpha_adjust == AC_ALPHA_ADJUST_SNORM) {
bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(alpha, v1), Operand(alpha, v1));
bld.vop2(aco_opcode::v_max_f32, Definition(alpha, v1), Operand::c32(0xbf800000u),
Operand(alpha, v1));
} else if (alpha_adjust == ALPHA_ADJUST_SSCALED) {
} else if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED) {
bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(alpha, v1), Operand(alpha, v1));
}
}

View File

@@ -5967,7 +5967,7 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD
if (!found) {
unsigned nfmt, dfmt;
bool post_shuffle;
enum radv_vs_input_alpha_adjust alpha_adjust;
enum ac_vs_input_alpha_adjust alpha_adjust;
const struct util_format_description *format_desc = vk_format_description(attrib->format);
found = util_dynarray_grow(&cmd_buffer->cached_vertex_formats,

View File

@@ -151,26 +151,26 @@ void
radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format,
const struct util_format_description *desc, unsigned *dfmt,
unsigned *nfmt, bool *post_shuffle,
enum radv_vs_input_alpha_adjust *alpha_adjust)
enum ac_vs_input_alpha_adjust *alpha_adjust)
{
assert(desc->channel[0].type != UTIL_FORMAT_TYPE_VOID);
*nfmt = radv_translate_buffer_numformat(desc, 0);
*dfmt = radv_translate_buffer_dataformat(desc, 0);
*alpha_adjust = ALPHA_ADJUST_NONE;
*alpha_adjust = AC_ALPHA_ADJUST_NONE;
if (pdevice->rad_info.gfx_level <= GFX8 && pdevice->rad_info.family != CHIP_STONEY) {
switch (format) {
case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
*alpha_adjust = ALPHA_ADJUST_SNORM;
*alpha_adjust = AC_ALPHA_ADJUST_SNORM;
break;
case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
*alpha_adjust = ALPHA_ADJUST_SSCALED;
*alpha_adjust = AC_ALPHA_ADJUST_SSCALED;
break;
case VK_FORMAT_A2R10G10B10_SINT_PACK32:
case VK_FORMAT_A2B10G10R10_SINT_PACK32:
*alpha_adjust = ALPHA_ADJUST_SINT;
*alpha_adjust = AC_ALPHA_ADJUST_SINT;
break;
default:
break;

View File

@@ -3776,11 +3776,10 @@ radv_consider_force_vrs(const struct radv_pipeline *pipeline, bool noop_fs,
}
static nir_ssa_def *
radv_adjust_vertex_fetch_alpha(nir_builder *b,
enum radv_vs_input_alpha_adjust alpha_adjust,
radv_adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust,
nir_ssa_def *alpha)
{
if (alpha_adjust == ALPHA_ADJUST_SSCALED)
if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED)
alpha = nir_f2u32(b, alpha);
/* For the integer-like cases, do a natural sign extension.
@@ -3788,15 +3787,15 @@ radv_adjust_vertex_fetch_alpha(nir_builder *b,
* For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 and happen to contain 0, 1, 2, 3 as
* the two LSBs of the exponent.
*/
unsigned offset = alpha_adjust == ALPHA_ADJUST_SNORM ? 23u : 0u;
unsigned offset = alpha_adjust == AC_ALPHA_ADJUST_SNORM ? 23u : 0u;
alpha = nir_ibfe_imm(b, alpha, offset, 2u);
/* Convert back to the right type. */
if (alpha_adjust == ALPHA_ADJUST_SNORM) {
if (alpha_adjust == AC_ALPHA_ADJUST_SNORM) {
alpha = nir_i2f32(b, alpha);
alpha = nir_fmax(b, alpha, nir_imm_float(b, -1.0f));
} else if (alpha_adjust == ALPHA_ADJUST_SSCALED) {
} else if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED) {
alpha = nir_i2f32(b, alpha);
}
@@ -3825,7 +3824,8 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
continue;
unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0;
enum radv_vs_input_alpha_adjust alpha_adjust = pipeline_key->vs.vertex_alpha_adjust[location];
enum ac_vs_input_alpha_adjust alpha_adjust =
pipeline_key->vs.vertex_alpha_adjust[location];
bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location);
unsigned component = nir_intrinsic_component(intrin);
@@ -3871,7 +3871,7 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
}
}
if (alpha_adjust != ALPHA_ADJUST_NONE && component + num_components == 4) {
if (alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) {
unsigned idx = num_components - 1;
channels[idx] = radv_adjust_vertex_fetch_alpha(&b, alpha_adjust, channels[idx]);
}

View File

@@ -2184,7 +2184,7 @@ bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
void radv_translate_vertex_format(const struct radv_physical_device *pdevice, VkFormat format,
const struct util_format_description *desc, unsigned *dfmt,
unsigned *nfmt, bool *post_shuffle,
enum radv_vs_input_alpha_adjust *alpha_adjust);
enum ac_vs_input_alpha_adjust *alpha_adjust);
uint32_t radv_translate_colorformat(VkFormat format);
uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
int first_non_void);

View File

@@ -52,13 +52,6 @@ struct radv_shader_args;
struct radv_vs_input_state;
struct radv_shader_args;
enum radv_vs_input_alpha_adjust {
ALPHA_ADJUST_NONE = 0,
ALPHA_ADJUST_SNORM = 1,
ALPHA_ADJUST_SSCALED = 2,
ALPHA_ADJUST_SINT = 3,
};
struct radv_pipeline_key {
uint32_t has_multiview_view_index : 1;
uint32_t optimisations_disabled : 1;
@@ -78,7 +71,7 @@ struct radv_pipeline_key {
uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
uint8_t vertex_binding_align[MAX_VBS];
enum radv_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
enum ac_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
uint32_t vertex_post_shuffle;
uint32_t provoking_vtx_last : 1;
uint32_t dynamic_input_state : 1;