radv,aco: use pipe_format for static vertex input state

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17894>
This commit is contained in:
Rhys Perry
2022-07-29 20:14:59 +01:00
committed by Marge Bot
parent d4ec156d82
commit c06a5a5ebd
5 changed files with 59 additions and 96 deletions

View File

@@ -5386,11 +5386,10 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr)
}
bool
check_vertex_fetch_size(isel_context* ctx, const ac_data_format_info* vtx_info, unsigned offset,
check_vertex_fetch_size(isel_context* ctx, const ac_vtx_format_info* vtx_info, unsigned offset,
unsigned binding_align, unsigned channels)
{
unsigned vertex_byte_size = vtx_info->chan_byte_size * channels;
if (vtx_info->chan_byte_size != 4 && channels == 3)
if (!(vtx_info->has_hw_format & BITFIELD_BIT(channels - 1)))
return false;
/* Split typed vertex buffer loads on GFX6 and GFX10+ to avoid any
@@ -5399,17 +5398,18 @@ check_vertex_fetch_size(isel_context* ctx, const ac_data_format_info* vtx_info,
* also if the VBO offset is aligned to a scalar (eg. stride is 8 and VBO
* offset is 2 for R16G16B16A16_SNORM).
*/
unsigned vertex_byte_size = vtx_info->chan_byte_size * channels;
return (ctx->options->gfx_level >= GFX7 && ctx->options->gfx_level <= GFX9) ||
(offset % vertex_byte_size == 0 && MAX2(binding_align, 1) % vertex_byte_size == 0);
}
uint8_t
get_fetch_data_format(isel_context* ctx, const ac_data_format_info* vtx_info, unsigned offset,
get_fetch_format(isel_context* ctx, const ac_vtx_format_info* vtx_info, unsigned offset,
unsigned* channels, unsigned max_channels, unsigned binding_align)
{
if (!vtx_info->chan_byte_size) {
*channels = vtx_info->num_channels;
return vtx_info->chan_format;
return vtx_info->hw_format[0];
}
unsigned num_channels = *channels;
@@ -5434,22 +5434,7 @@ get_fetch_data_format(isel_context* ctx, const ac_data_format_info* vtx_info, un
num_channels = new_channels;
}
switch (vtx_info->chan_format) {
case V_008F0C_BUF_DATA_FORMAT_8:
return std::array<uint8_t, 4>{V_008F0C_BUF_DATA_FORMAT_8, V_008F0C_BUF_DATA_FORMAT_8_8,
V_008F0C_BUF_DATA_FORMAT_INVALID,
V_008F0C_BUF_DATA_FORMAT_8_8_8_8}[num_channels - 1];
case V_008F0C_BUF_DATA_FORMAT_16:
return std::array<uint8_t, 4>{V_008F0C_BUF_DATA_FORMAT_16, V_008F0C_BUF_DATA_FORMAT_16_16,
V_008F0C_BUF_DATA_FORMAT_INVALID,
V_008F0C_BUF_DATA_FORMAT_16_16_16_16}[num_channels - 1];
case V_008F0C_BUF_DATA_FORMAT_32:
return std::array<uint8_t, 4>{V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32,
V_008F0C_BUF_DATA_FORMAT_32_32_32,
V_008F0C_BUF_DATA_FORMAT_32_32_32_32}[num_channels - 1];
}
unreachable("shouldn't reach here");
return V_008F0C_BUF_DATA_FORMAT_INVALID;
return vtx_info->hw_format[num_channels - 1];
}
void
@@ -5503,12 +5488,12 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[location];
uint32_t attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[location];
uint32_t attrib_stride = ctx->options->key.vs.vertex_attribute_strides[location];
unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[location];
enum pipe_format attrib_format =
(enum pipe_format)ctx->options->key.vs.vertex_attribute_formats[location];
unsigned binding_align = ctx->options->key.vs.vertex_binding_align[attrib_binding];
unsigned dfmt = attrib_format & 0xf;
unsigned nfmt = (attrib_format >> 4) & 0x7;
const struct ac_data_format_info* vtx_info = ac_get_data_format_info(dfmt);
const struct ac_vtx_format_info* vtx_info =
ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, attrib_format);
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa) << component;
unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
@@ -5559,14 +5544,10 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
/* use MUBUF when possible to avoid possible alignment issues */
/* TODO: we could use SDWA to unpack 8/16-bit attributes without extra instructions */
bool use_mubuf =
(nfmt == V_008F0C_BUF_NUM_FORMAT_FLOAT || nfmt == V_008F0C_BUF_NUM_FORMAT_UINT ||
nfmt == V_008F0C_BUF_NUM_FORMAT_SINT) &&
vtx_info->chan_byte_size == 4 && bitsize != 16;
unsigned fetch_dfmt = V_008F0C_BUF_DATA_FORMAT_INVALID;
bool use_mubuf = vtx_info->chan_byte_size == 4 && bitsize != 16;
unsigned fetch_fmt = V_008F0C_BUF_DATA_FORMAT_INVALID;
if (!use_mubuf) {
fetch_dfmt =
get_fetch_data_format(ctx, vtx_info, fetch_offset, &fetch_component,
fetch_fmt = get_fetch_format(ctx, vtx_info, fetch_offset, &fetch_component,
vtx_info->num_channels - channel_start, binding_align);
} else {
/* GFX6 only supports loading vec3 with MTBUF, split to vec2,scalar. */
@@ -5644,8 +5625,10 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
.instr;
mubuf->mubuf().vtx_binding = attrib_binding + 1;
} else {
unsigned dfmt = fetch_fmt & 0xf;
unsigned nfmt = fetch_fmt >> 4;
Instruction* mtbuf = bld.mtbuf(opcode, Definition(fetch_dst), list, fetch_index,
soffset, fetch_dfmt, nfmt, fetch_offset, false, true)
soffset, dfmt, nfmt, fetch_offset, false, true)
.instr;
mtbuf->mtbuf().vtx_binding = attrib_binding + 1;
}
@@ -5665,7 +5648,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
if (!direct_fetch) {
bool is_float =
nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT;
nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr)) == nir_type_float;
unsigned num_components = instr->dest.ssa.num_components;

View File

@@ -416,30 +416,19 @@ init_context(isel_context* ctx, nir_shader* shader)
ctx->ub_config.max_workgroup_size[1] = 2048;
ctx->ub_config.max_workgroup_size[2] = 2048;
for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[i];
unsigned dfmt = attrib_format & 0xf;
unsigned nfmt = (attrib_format >> 4) & 0x7;
pipe_format format = (pipe_format)ctx->options->key.vs.vertex_attribute_formats[i];
const struct util_format_description* desc = util_format_description(format);
uint32_t max = UINT32_MAX;
if (nfmt == V_008F0C_BUF_NUM_FORMAT_UNORM) {
uint32_t max;
if (desc->channel[0].type != UTIL_FORMAT_TYPE_UNSIGNED) {
max = UINT32_MAX;
} else if (desc->channel[0].normalized) {
max = 0x3f800000u;
} else if (nfmt == V_008F0C_BUF_NUM_FORMAT_UINT || nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED) {
bool uscaled = nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED;
switch (dfmt) {
case V_008F0C_BUF_DATA_FORMAT_8:
case V_008F0C_BUF_DATA_FORMAT_8_8:
case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: max = uscaled ? 0x437f0000u : UINT8_MAX; break;
case V_008F0C_BUF_DATA_FORMAT_10_10_10_2:
case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: max = uscaled ? 0x447fc000u : 1023; break;
case V_008F0C_BUF_DATA_FORMAT_10_11_11:
case V_008F0C_BUF_DATA_FORMAT_11_11_10: max = uscaled ? 0x44ffe000u : 2047; break;
case V_008F0C_BUF_DATA_FORMAT_16:
case V_008F0C_BUF_DATA_FORMAT_16_16:
case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: max = uscaled ? 0x477fff00u : UINT16_MAX; break;
case V_008F0C_BUF_DATA_FORMAT_32:
case V_008F0C_BUF_DATA_FORMAT_32_32:
case V_008F0C_BUF_DATA_FORMAT_32_32_32:
case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: max = uscaled ? 0x4f800000u : UINT32_MAX; break;
} else {
max = 0;
for (unsigned j = 0; j < desc->nr_channels; j++) {
uint32_t chan_max = u_uintN_max(desc->channel[0].size);
max = MAX2(max, desc->channel[j].pure_integer ? chan_max : fui(chan_max));
}
}
ctx->ub_config.vertex_attrib_max[i] = max;

View File

@@ -394,11 +394,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
LLVMValueRef input;
LLVMValueRef buffer_index;
unsigned attrib_index = driver_location - VERT_ATTRIB_GENERIC0;
unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index];
unsigned data_format = attrib_format & 0x0f;
unsigned num_format = (attrib_format >> 4) & 0x07;
bool is_float =
num_format != V_008F0C_BUF_NUM_FORMAT_UINT && num_format != V_008F0C_BUF_NUM_FORMAT_SINT;
enum pipe_format attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index];
const struct util_format_description *desc = util_format_description(attrib_format);
bool is_float = !desc->channel[0].pure_integer;
uint8_t input_usage_mask =
ctx->shader_info->vs.input_usage_mask[driver_location];
unsigned num_input_channels = util_last_bit(input_usage_mask);
@@ -424,7 +422,8 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex), "");
}
const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
const struct ac_vtx_format_info *vtx_info =
ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, attrib_format);
/* Adjust the number of channels to load based on the vertex attribute format. */
unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
@@ -432,6 +431,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index];
unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index];
unsigned data_format = vtx_info->hw_format[num_channels - 1] & 0xf;
unsigned num_format = vtx_info->hw_format[0] >> 4;
unsigned desc_index =
ctx->shader_info->vs.use_per_attribute_vb_descs ? attrib_index : attrib_binding;
desc_index = util_bitcount(ctx->shader_info->vs.vb_desc_usage_mask &
@@ -444,8 +446,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
* dynamic) is unaligned and also if the VBO offset is aligned to a scalar (eg. stride is 8 and
* VBO offset is 2 for R16G16B16A16_SNORM).
*/
if ((ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10) && vtx_info->chan_byte_size) {
unsigned chan_format = vtx_info->chan_format;
if (((ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10) && vtx_info->chan_byte_size) ||
!(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1))) {
unsigned chan_format = vtx_info->hw_format[0] & 0xf;
LLVMValueRef values[4];
for (unsigned chan = 0; chan < num_channels; chan++) {

View File

@@ -3010,6 +3010,7 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin
const struct radv_blend_state *blend)
{
struct radv_device *device = pipeline->base.device;
const struct radv_physical_device *pdevice = device->physical_device;
struct radv_pipeline_key key = radv_generate_pipeline_key(&pipeline->base, pCreateInfo->flags);
key.has_multiview_view_index = !!state->rp->view_mask;
@@ -3023,16 +3024,9 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin
u_foreach_bit(i, state->vi->attributes_valid) {
uint32_t binding = state->vi->attributes[i].binding;
uint32_t offset = state->vi->attributes[i].offset;
VkFormat format = state->vi->attributes[i].format;
const struct util_format_description *format_desc;
unsigned num_format, data_format;
bool post_shuffle;
enum pipe_format format = vk_format_to_pipe_format(state->vi->attributes[i].format);
format_desc = vk_format_description(format);
radv_translate_vertex_format(device->physical_device, format, format_desc, &data_format,
&num_format, &post_shuffle, &key.vs.vertex_alpha_adjust[i]);
key.vs.vertex_attribute_formats[i] = data_format | (num_format << 4);
key.vs.vertex_attribute_formats[i] = format;
key.vs.vertex_attribute_bindings[i] = binding;
key.vs.vertex_attribute_offsets[i] = offset;
key.vs.instance_rate_divisors[i] = state->vi->bindings[binding].divisor;
@@ -3056,13 +3050,10 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin
key.vs.instance_rate_inputs |= 1u << i;
}
if (post_shuffle) {
key.vs.vertex_post_shuffle |= 1u << i;
}
const struct ac_data_format_info *dfmt_info = ac_get_data_format_info(data_format);
const struct ac_vtx_format_info *vtx_info =
ac_get_vtx_format_info(pdevice->rad_info.gfx_level, pdevice->rad_info.family, format);
unsigned attrib_align =
dfmt_info->chan_byte_size ? dfmt_info->chan_byte_size : dfmt_info->element_size;
vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
/* If offset is misaligned, then the buffer offset must be too. Just skip updating
* vertex_binding_align in this case.
@@ -3803,7 +3794,8 @@ radv_adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alp
}
static bool
radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_key)
radv_lower_vs_input(nir_shader *nir, const struct radv_physical_device *pdevice,
const struct radv_pipeline_key *pipeline_key)
{
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
bool progress = false;
@@ -3824,25 +3816,22 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
continue;
unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0;
enum ac_vs_input_alpha_adjust alpha_adjust =
pipeline_key->vs.vertex_alpha_adjust[location];
bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location);
unsigned component = nir_intrinsic_component(intrin);
unsigned num_components = intrin->dest.ssa.num_components;
unsigned attrib_format = pipeline_key->vs.vertex_attribute_formats[location];
unsigned dfmt = attrib_format & 0xf;
unsigned nfmt = (attrib_format >> 4) & 0x7;
const struct ac_data_format_info *vtx_info = ac_get_data_format_info(dfmt);
enum pipe_format attrib_format = pipeline_key->vs.vertex_attribute_formats[location];
const struct ac_vtx_format_info *desc = ac_get_vtx_format_info(
pdevice->rad_info.gfx_level, pdevice->rad_info.family, attrib_format);
bool is_float =
nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT;
nir_alu_type_get_base_type(nir_intrinsic_dest_type(intrin)) == nir_type_float;
unsigned mask = nir_ssa_def_components_read(&intrin->dest.ssa) << component;
unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
unsigned num_channels = MIN2(util_last_bit(mask), desc->num_channels);
static const unsigned swizzle_normal[4] = {0, 1, 2, 3};
static const unsigned swizzle_post_shuffle[4] = {2, 1, 0, 3};
bool post_shuffle = G_008F0C_DST_SEL_X(desc->dst_sel) == V_008F0C_SQ_SEL_Z;
const unsigned *swizzle = post_shuffle ? swizzle_post_shuffle : swizzle_normal;
b.cursor = nir_after_instr(instr);
@@ -3871,9 +3860,9 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
}
}
if (alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) {
if (desc->alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) {
unsigned idx = num_components - 1;
channels[idx] = radv_adjust_vertex_fetch_alpha(&b, alpha_adjust, channels[idx]);
channels[idx] = radv_adjust_vertex_fetch_alpha(&b, desc->alpha_adjust, channels[idx]);
}
nir_ssa_def *new_dest = nir_vec(&b, channels, num_components);
@@ -4579,7 +4568,8 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
}
if (stages[MESA_SHADER_VERTEX].nir) {
NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_lower_vs_input, pipeline_key);
NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_lower_vs_input, device->physical_device,
pipeline_key);
}
if (stages[MESA_SHADER_FRAGMENT].nir && !radv_use_llvm_for_stage(device, MESA_SHADER_FRAGMENT)) {

View File

@@ -71,8 +71,6 @@ struct radv_pipeline_key {
uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
uint8_t vertex_binding_align[MAX_VBS];
enum ac_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
uint32_t vertex_post_shuffle;
uint32_t provoking_vtx_last : 1;
uint32_t dynamic_input_state : 1;
uint8_t topology;