ac/nir: sort xfb info to facilitate vectorization of xfb stores
xfb stores are not vectorized properly, leading to generating random soup of b32, b64, b96, and b128 stores. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32686>
This commit is contained in:
@@ -1153,6 +1153,36 @@ ac_nir_lower_indirect_derefs(nir_shader *shader,
|
||||
return progress;
|
||||
}
|
||||
|
||||
static int
|
||||
sort_xfb(const void *_a, const void *_b)
|
||||
{
|
||||
const nir_xfb_output_info *a = (const nir_xfb_output_info *)_a;
|
||||
const nir_xfb_output_info *b = (const nir_xfb_output_info *)_b;
|
||||
|
||||
if (a->buffer != b->buffer)
|
||||
return a->buffer > b->buffer ? 1 : -1;
|
||||
|
||||
assert(a->offset != b->offset);
|
||||
return a->offset > b->offset ? 1 : -1;
|
||||
}
|
||||
|
||||
/* Return XFB info sorted by buffer and offset, so that we can generate vec4
|
||||
* stores by iterating over outputs only once.
|
||||
*/
|
||||
nir_xfb_info *
|
||||
ac_nir_get_sorted_xfb_info(const nir_shader *nir)
|
||||
{
|
||||
if (!nir->xfb_info)
|
||||
return NULL;
|
||||
|
||||
unsigned xfb_info_size = nir_xfb_info_size(nir->xfb_info->output_count);
|
||||
nir_xfb_info *info = rzalloc_size(nir, xfb_info_size);
|
||||
|
||||
memcpy(info, nir->xfb_info, xfb_info_size);
|
||||
qsort(info->outputs, info->output_count, sizeof(info->outputs[0]), sort_xfb);
|
||||
return info;
|
||||
}
|
||||
|
||||
static nir_def **
|
||||
get_output_and_type(ac_nir_prerast_out *out, unsigned slot, bool high_16bits,
|
||||
nir_alu_type **types)
|
||||
@@ -1270,7 +1300,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
||||
|
||||
nir_def *gsvs_ring = nir_load_ring_gsvs_amd(&b);
|
||||
|
||||
nir_xfb_info *info = gs_nir->xfb_info;
|
||||
nir_xfb_info *info = ac_nir_get_sorted_xfb_info(gs_nir);
|
||||
nir_def *stream_id = NULL;
|
||||
if (!disable_streamout && info)
|
||||
stream_id = nir_ubfe_imm(&b, nir_load_streamout_config_amd(&b), 24, 2);
|
||||
@@ -1439,7 +1469,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
|
||||
}
|
||||
|
||||
if (!disable_streamout && nir->xfb_info) {
|
||||
emit_streamout(&b, 0, nir->xfb_info, &out);
|
||||
emit_streamout(&b, 0, ac_nir_get_sorted_xfb_info(nir), &out);
|
||||
preserved = nir_metadata_none;
|
||||
}
|
||||
|
||||
|
@@ -54,6 +54,9 @@ typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic);
|
||||
struct nir_builder;
|
||||
typedef struct nir_builder nir_builder;
|
||||
|
||||
struct nir_xfb_info;
|
||||
typedef struct nir_xfb_info nir_xfb_info;
|
||||
|
||||
/* Executed by ac_nir_cull when the current primitive is accepted. */
|
||||
typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
|
||||
|
||||
@@ -82,6 +85,8 @@ bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_leve
|
||||
unsigned wave_size, unsigned workgroup_size,
|
||||
const struct ac_shader_args *ac_args);
|
||||
|
||||
nir_xfb_info *ac_nir_get_sorted_xfb_info(const nir_shader *nir);
|
||||
|
||||
bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed,
|
||||
int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS],
|
||||
uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]);
|
||||
|
@@ -1841,7 +1841,7 @@ ngg_nogs_store_edgeflag_to_lds(nir_builder *b, lower_ngg_nogs_state *s)
|
||||
static void
|
||||
ngg_nogs_store_xfb_outputs_to_lds(nir_builder *b, lower_ngg_nogs_state *s)
|
||||
{
|
||||
nir_xfb_info *info = b->shader->xfb_info;
|
||||
nir_xfb_info *info = ac_nir_get_sorted_xfb_info(b->shader);
|
||||
|
||||
uint64_t xfb_outputs = 0;
|
||||
unsigned xfb_outputs_16bit = 0;
|
||||
@@ -2368,7 +2368,7 @@ ngg_build_streamout_vertex(nir_builder *b, nir_xfb_info *info,
|
||||
static void
|
||||
ngg_nogs_build_streamout(nir_builder *b, lower_ngg_nogs_state *s)
|
||||
{
|
||||
nir_xfb_info *info = b->shader->xfb_info;
|
||||
nir_xfb_info *info = ac_nir_get_sorted_xfb_info(b->shader);
|
||||
|
||||
nir_def *lds_scratch_base = nir_load_lds_ngg_scratch_base_amd(b);
|
||||
|
||||
@@ -3421,7 +3421,7 @@ ngg_gs_cull_primitive(nir_builder *b, nir_def *tid_in_tg, nir_def *max_vtxcnt,
|
||||
static void
|
||||
ngg_gs_build_streamout(nir_builder *b, lower_ngg_gs_state *s)
|
||||
{
|
||||
nir_xfb_info *info = b->shader->xfb_info;
|
||||
nir_xfb_info *info = ac_nir_get_sorted_xfb_info(b->shader);
|
||||
|
||||
nir_def *tid_in_tg = nir_load_local_invocation_index(b);
|
||||
nir_def *max_vtxcnt = nir_load_workgroup_num_input_vertices_amd(b);
|
||||
|
Reference in New Issue
Block a user