radv,aco: use pipe_format for static vertex input state
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17894>
This commit is contained in:
@@ -5386,11 +5386,10 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
}
|
||||
|
||||
bool
|
||||
check_vertex_fetch_size(isel_context* ctx, const ac_data_format_info* vtx_info, unsigned offset,
|
||||
check_vertex_fetch_size(isel_context* ctx, const ac_vtx_format_info* vtx_info, unsigned offset,
|
||||
unsigned binding_align, unsigned channels)
|
||||
{
|
||||
unsigned vertex_byte_size = vtx_info->chan_byte_size * channels;
|
||||
if (vtx_info->chan_byte_size != 4 && channels == 3)
|
||||
if (!(vtx_info->has_hw_format & BITFIELD_BIT(channels - 1)))
|
||||
return false;
|
||||
|
||||
/* Split typed vertex buffer loads on GFX6 and GFX10+ to avoid any
|
||||
@@ -5399,17 +5398,18 @@ check_vertex_fetch_size(isel_context* ctx, const ac_data_format_info* vtx_info,
|
||||
* also if the VBO offset is aligned to a scalar (eg. stride is 8 and VBO
|
||||
* offset is 2 for R16G16B16A16_SNORM).
|
||||
*/
|
||||
unsigned vertex_byte_size = vtx_info->chan_byte_size * channels;
|
||||
return (ctx->options->gfx_level >= GFX7 && ctx->options->gfx_level <= GFX9) ||
|
||||
(offset % vertex_byte_size == 0 && MAX2(binding_align, 1) % vertex_byte_size == 0);
|
||||
}
|
||||
|
||||
uint8_t
|
||||
get_fetch_data_format(isel_context* ctx, const ac_data_format_info* vtx_info, unsigned offset,
|
||||
get_fetch_format(isel_context* ctx, const ac_vtx_format_info* vtx_info, unsigned offset,
|
||||
unsigned* channels, unsigned max_channels, unsigned binding_align)
|
||||
{
|
||||
if (!vtx_info->chan_byte_size) {
|
||||
*channels = vtx_info->num_channels;
|
||||
return vtx_info->chan_format;
|
||||
return vtx_info->hw_format[0];
|
||||
}
|
||||
|
||||
unsigned num_channels = *channels;
|
||||
@@ -5434,22 +5434,7 @@ get_fetch_data_format(isel_context* ctx, const ac_data_format_info* vtx_info, un
|
||||
num_channels = new_channels;
|
||||
}
|
||||
|
||||
switch (vtx_info->chan_format) {
|
||||
case V_008F0C_BUF_DATA_FORMAT_8:
|
||||
return std::array<uint8_t, 4>{V_008F0C_BUF_DATA_FORMAT_8, V_008F0C_BUF_DATA_FORMAT_8_8,
|
||||
V_008F0C_BUF_DATA_FORMAT_INVALID,
|
||||
V_008F0C_BUF_DATA_FORMAT_8_8_8_8}[num_channels - 1];
|
||||
case V_008F0C_BUF_DATA_FORMAT_16:
|
||||
return std::array<uint8_t, 4>{V_008F0C_BUF_DATA_FORMAT_16, V_008F0C_BUF_DATA_FORMAT_16_16,
|
||||
V_008F0C_BUF_DATA_FORMAT_INVALID,
|
||||
V_008F0C_BUF_DATA_FORMAT_16_16_16_16}[num_channels - 1];
|
||||
case V_008F0C_BUF_DATA_FORMAT_32:
|
||||
return std::array<uint8_t, 4>{V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32,
|
||||
V_008F0C_BUF_DATA_FORMAT_32_32_32,
|
||||
V_008F0C_BUF_DATA_FORMAT_32_32_32_32}[num_channels - 1];
|
||||
}
|
||||
unreachable("shouldn't reach here");
|
||||
return V_008F0C_BUF_DATA_FORMAT_INVALID;
|
||||
return vtx_info->hw_format[num_channels - 1];
|
||||
}
|
||||
|
||||
void
|
||||
@@ -5503,12 +5488,12 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[location];
|
||||
uint32_t attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[location];
|
||||
uint32_t attrib_stride = ctx->options->key.vs.vertex_attribute_strides[location];
|
||||
unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[location];
|
||||
enum pipe_format attrib_format =
|
||||
(enum pipe_format)ctx->options->key.vs.vertex_attribute_formats[location];
|
||||
unsigned binding_align = ctx->options->key.vs.vertex_binding_align[attrib_binding];
|
||||
|
||||
unsigned dfmt = attrib_format & 0xf;
|
||||
unsigned nfmt = (attrib_format >> 4) & 0x7;
|
||||
const struct ac_data_format_info* vtx_info = ac_get_data_format_info(dfmt);
|
||||
const struct ac_vtx_format_info* vtx_info =
|
||||
ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, attrib_format);
|
||||
|
||||
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa) << component;
|
||||
unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
|
||||
@@ -5559,14 +5544,10 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
|
||||
/* use MUBUF when possible to avoid possible alignment issues */
|
||||
/* TODO: we could use SDWA to unpack 8/16-bit attributes without extra instructions */
|
||||
bool use_mubuf =
|
||||
(nfmt == V_008F0C_BUF_NUM_FORMAT_FLOAT || nfmt == V_008F0C_BUF_NUM_FORMAT_UINT ||
|
||||
nfmt == V_008F0C_BUF_NUM_FORMAT_SINT) &&
|
||||
vtx_info->chan_byte_size == 4 && bitsize != 16;
|
||||
unsigned fetch_dfmt = V_008F0C_BUF_DATA_FORMAT_INVALID;
|
||||
bool use_mubuf = vtx_info->chan_byte_size == 4 && bitsize != 16;
|
||||
unsigned fetch_fmt = V_008F0C_BUF_DATA_FORMAT_INVALID;
|
||||
if (!use_mubuf) {
|
||||
fetch_dfmt =
|
||||
get_fetch_data_format(ctx, vtx_info, fetch_offset, &fetch_component,
|
||||
fetch_fmt = get_fetch_format(ctx, vtx_info, fetch_offset, &fetch_component,
|
||||
vtx_info->num_channels - channel_start, binding_align);
|
||||
} else {
|
||||
/* GFX6 only supports loading vec3 with MTBUF, split to vec2,scalar. */
|
||||
@@ -5644,8 +5625,10 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
.instr;
|
||||
mubuf->mubuf().vtx_binding = attrib_binding + 1;
|
||||
} else {
|
||||
unsigned dfmt = fetch_fmt & 0xf;
|
||||
unsigned nfmt = fetch_fmt >> 4;
|
||||
Instruction* mtbuf = bld.mtbuf(opcode, Definition(fetch_dst), list, fetch_index,
|
||||
soffset, fetch_dfmt, nfmt, fetch_offset, false, true)
|
||||
soffset, dfmt, nfmt, fetch_offset, false, true)
|
||||
.instr;
|
||||
mtbuf->mtbuf().vtx_binding = attrib_binding + 1;
|
||||
}
|
||||
@@ -5665,7 +5648,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
|
||||
if (!direct_fetch) {
|
||||
bool is_float =
|
||||
nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT;
|
||||
nir_alu_type_get_base_type(nir_intrinsic_dest_type(instr)) == nir_type_float;
|
||||
|
||||
unsigned num_components = instr->dest.ssa.num_components;
|
||||
|
||||
|
@@ -416,30 +416,19 @@ init_context(isel_context* ctx, nir_shader* shader)
|
||||
ctx->ub_config.max_workgroup_size[1] = 2048;
|
||||
ctx->ub_config.max_workgroup_size[2] = 2048;
|
||||
for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
|
||||
unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[i];
|
||||
unsigned dfmt = attrib_format & 0xf;
|
||||
unsigned nfmt = (attrib_format >> 4) & 0x7;
|
||||
pipe_format format = (pipe_format)ctx->options->key.vs.vertex_attribute_formats[i];
|
||||
const struct util_format_description* desc = util_format_description(format);
|
||||
|
||||
uint32_t max = UINT32_MAX;
|
||||
if (nfmt == V_008F0C_BUF_NUM_FORMAT_UNORM) {
|
||||
uint32_t max;
|
||||
if (desc->channel[0].type != UTIL_FORMAT_TYPE_UNSIGNED) {
|
||||
max = UINT32_MAX;
|
||||
} else if (desc->channel[0].normalized) {
|
||||
max = 0x3f800000u;
|
||||
} else if (nfmt == V_008F0C_BUF_NUM_FORMAT_UINT || nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED) {
|
||||
bool uscaled = nfmt == V_008F0C_BUF_NUM_FORMAT_USCALED;
|
||||
switch (dfmt) {
|
||||
case V_008F0C_BUF_DATA_FORMAT_8:
|
||||
case V_008F0C_BUF_DATA_FORMAT_8_8:
|
||||
case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: max = uscaled ? 0x437f0000u : UINT8_MAX; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_10_10_10_2:
|
||||
case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: max = uscaled ? 0x447fc000u : 1023; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_10_11_11:
|
||||
case V_008F0C_BUF_DATA_FORMAT_11_11_10: max = uscaled ? 0x44ffe000u : 2047; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_16:
|
||||
case V_008F0C_BUF_DATA_FORMAT_16_16:
|
||||
case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: max = uscaled ? 0x477fff00u : UINT16_MAX; break;
|
||||
case V_008F0C_BUF_DATA_FORMAT_32:
|
||||
case V_008F0C_BUF_DATA_FORMAT_32_32:
|
||||
case V_008F0C_BUF_DATA_FORMAT_32_32_32:
|
||||
case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: max = uscaled ? 0x4f800000u : UINT32_MAX; break;
|
||||
} else {
|
||||
max = 0;
|
||||
for (unsigned j = 0; j < desc->nr_channels; j++) {
|
||||
uint32_t chan_max = u_uintN_max(desc->channel[0].size);
|
||||
max = MAX2(max, desc->channel[j].pure_integer ? chan_max : fui(chan_max));
|
||||
}
|
||||
}
|
||||
ctx->ub_config.vertex_attrib_max[i] = max;
|
||||
|
@@ -394,11 +394,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
|
||||
LLVMValueRef input;
|
||||
LLVMValueRef buffer_index;
|
||||
unsigned attrib_index = driver_location - VERT_ATTRIB_GENERIC0;
|
||||
unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index];
|
||||
unsigned data_format = attrib_format & 0x0f;
|
||||
unsigned num_format = (attrib_format >> 4) & 0x07;
|
||||
bool is_float =
|
||||
num_format != V_008F0C_BUF_NUM_FORMAT_UINT && num_format != V_008F0C_BUF_NUM_FORMAT_SINT;
|
||||
enum pipe_format attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index];
|
||||
const struct util_format_description *desc = util_format_description(attrib_format);
|
||||
bool is_float = !desc->channel[0].pure_integer;
|
||||
uint8_t input_usage_mask =
|
||||
ctx->shader_info->vs.input_usage_mask[driver_location];
|
||||
unsigned num_input_channels = util_last_bit(input_usage_mask);
|
||||
@@ -424,7 +422,8 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex), "");
|
||||
}
|
||||
|
||||
const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
|
||||
const struct ac_vtx_format_info *vtx_info =
|
||||
ac_get_vtx_format_info(GFX8, CHIP_POLARIS10, attrib_format);
|
||||
|
||||
/* Adjust the number of channels to load based on the vertex attribute format. */
|
||||
unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
|
||||
@@ -432,6 +431,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
|
||||
unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index];
|
||||
unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index];
|
||||
|
||||
unsigned data_format = vtx_info->hw_format[num_channels - 1] & 0xf;
|
||||
unsigned num_format = vtx_info->hw_format[0] >> 4;
|
||||
|
||||
unsigned desc_index =
|
||||
ctx->shader_info->vs.use_per_attribute_vb_descs ? attrib_index : attrib_binding;
|
||||
desc_index = util_bitcount(ctx->shader_info->vs.vb_desc_usage_mask &
|
||||
@@ -444,8 +446,9 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
|
||||
* dynamic) is unaligned and also if the VBO offset is aligned to a scalar (eg. stride is 8 and
|
||||
* VBO offset is 2 for R16G16B16A16_SNORM).
|
||||
*/
|
||||
if ((ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10) && vtx_info->chan_byte_size) {
|
||||
unsigned chan_format = vtx_info->chan_format;
|
||||
if (((ctx->ac.gfx_level == GFX6 || ctx->ac.gfx_level >= GFX10) && vtx_info->chan_byte_size) ||
|
||||
!(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1))) {
|
||||
unsigned chan_format = vtx_info->hw_format[0] & 0xf;
|
||||
LLVMValueRef values[4];
|
||||
|
||||
for (unsigned chan = 0; chan < num_channels; chan++) {
|
||||
|
@@ -3010,6 +3010,7 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin
|
||||
const struct radv_blend_state *blend)
|
||||
{
|
||||
struct radv_device *device = pipeline->base.device;
|
||||
const struct radv_physical_device *pdevice = device->physical_device;
|
||||
struct radv_pipeline_key key = radv_generate_pipeline_key(&pipeline->base, pCreateInfo->flags);
|
||||
|
||||
key.has_multiview_view_index = !!state->rp->view_mask;
|
||||
@@ -3023,16 +3024,9 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin
|
||||
u_foreach_bit(i, state->vi->attributes_valid) {
|
||||
uint32_t binding = state->vi->attributes[i].binding;
|
||||
uint32_t offset = state->vi->attributes[i].offset;
|
||||
VkFormat format = state->vi->attributes[i].format;
|
||||
const struct util_format_description *format_desc;
|
||||
unsigned num_format, data_format;
|
||||
bool post_shuffle;
|
||||
enum pipe_format format = vk_format_to_pipe_format(state->vi->attributes[i].format);
|
||||
|
||||
format_desc = vk_format_description(format);
|
||||
radv_translate_vertex_format(device->physical_device, format, format_desc, &data_format,
|
||||
&num_format, &post_shuffle, &key.vs.vertex_alpha_adjust[i]);
|
||||
|
||||
key.vs.vertex_attribute_formats[i] = data_format | (num_format << 4);
|
||||
key.vs.vertex_attribute_formats[i] = format;
|
||||
key.vs.vertex_attribute_bindings[i] = binding;
|
||||
key.vs.vertex_attribute_offsets[i] = offset;
|
||||
key.vs.instance_rate_divisors[i] = state->vi->bindings[binding].divisor;
|
||||
@@ -3056,13 +3050,10 @@ radv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipelin
|
||||
key.vs.instance_rate_inputs |= 1u << i;
|
||||
}
|
||||
|
||||
if (post_shuffle) {
|
||||
key.vs.vertex_post_shuffle |= 1u << i;
|
||||
}
|
||||
|
||||
const struct ac_data_format_info *dfmt_info = ac_get_data_format_info(data_format);
|
||||
const struct ac_vtx_format_info *vtx_info =
|
||||
ac_get_vtx_format_info(pdevice->rad_info.gfx_level, pdevice->rad_info.family, format);
|
||||
unsigned attrib_align =
|
||||
dfmt_info->chan_byte_size ? dfmt_info->chan_byte_size : dfmt_info->element_size;
|
||||
vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
|
||||
|
||||
/* If offset is misaligned, then the buffer offset must be too. Just skip updating
|
||||
* vertex_binding_align in this case.
|
||||
@@ -3803,7 +3794,8 @@ radv_adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alp
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_key)
|
||||
radv_lower_vs_input(nir_shader *nir, const struct radv_physical_device *pdevice,
|
||||
const struct radv_pipeline_key *pipeline_key)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
bool progress = false;
|
||||
@@ -3824,25 +3816,22 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
|
||||
continue;
|
||||
|
||||
unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0;
|
||||
enum ac_vs_input_alpha_adjust alpha_adjust =
|
||||
pipeline_key->vs.vertex_alpha_adjust[location];
|
||||
bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location);
|
||||
|
||||
unsigned component = nir_intrinsic_component(intrin);
|
||||
unsigned num_components = intrin->dest.ssa.num_components;
|
||||
|
||||
unsigned attrib_format = pipeline_key->vs.vertex_attribute_formats[location];
|
||||
unsigned dfmt = attrib_format & 0xf;
|
||||
unsigned nfmt = (attrib_format >> 4) & 0x7;
|
||||
const struct ac_data_format_info *vtx_info = ac_get_data_format_info(dfmt);
|
||||
enum pipe_format attrib_format = pipeline_key->vs.vertex_attribute_formats[location];
|
||||
const struct ac_vtx_format_info *desc = ac_get_vtx_format_info(
|
||||
pdevice->rad_info.gfx_level, pdevice->rad_info.family, attrib_format);
|
||||
bool is_float =
|
||||
nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT;
|
||||
nir_alu_type_get_base_type(nir_intrinsic_dest_type(intrin)) == nir_type_float;
|
||||
|
||||
unsigned mask = nir_ssa_def_components_read(&intrin->dest.ssa) << component;
|
||||
unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
|
||||
unsigned num_channels = MIN2(util_last_bit(mask), desc->num_channels);
|
||||
|
||||
static const unsigned swizzle_normal[4] = {0, 1, 2, 3};
|
||||
static const unsigned swizzle_post_shuffle[4] = {2, 1, 0, 3};
|
||||
bool post_shuffle = G_008F0C_DST_SEL_X(desc->dst_sel) == V_008F0C_SQ_SEL_Z;
|
||||
const unsigned *swizzle = post_shuffle ? swizzle_post_shuffle : swizzle_normal;
|
||||
|
||||
b.cursor = nir_after_instr(instr);
|
||||
@@ -3871,9 +3860,9 @@ radv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_ke
|
||||
}
|
||||
}
|
||||
|
||||
if (alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) {
|
||||
if (desc->alpha_adjust != AC_ALPHA_ADJUST_NONE && component + num_components == 4) {
|
||||
unsigned idx = num_components - 1;
|
||||
channels[idx] = radv_adjust_vertex_fetch_alpha(&b, alpha_adjust, channels[idx]);
|
||||
channels[idx] = radv_adjust_vertex_fetch_alpha(&b, desc->alpha_adjust, channels[idx]);
|
||||
}
|
||||
|
||||
nir_ssa_def *new_dest = nir_vec(&b, channels, num_components);
|
||||
@@ -4579,7 +4568,8 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
||||
}
|
||||
|
||||
if (stages[MESA_SHADER_VERTEX].nir) {
|
||||
NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_lower_vs_input, pipeline_key);
|
||||
NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_lower_vs_input, device->physical_device,
|
||||
pipeline_key);
|
||||
}
|
||||
|
||||
if (stages[MESA_SHADER_FRAGMENT].nir && !radv_use_llvm_for_stage(device, MESA_SHADER_FRAGMENT)) {
|
||||
|
@@ -71,8 +71,6 @@ struct radv_pipeline_key {
|
||||
uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
|
||||
uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
|
||||
uint8_t vertex_binding_align[MAX_VBS];
|
||||
enum ac_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
|
||||
uint32_t vertex_post_shuffle;
|
||||
uint32_t provoking_vtx_last : 1;
|
||||
uint32_t dynamic_input_state : 1;
|
||||
uint8_t topology;
|
||||
|
Reference in New Issue
Block a user