ac/nir,radv,radeonsi: gs copy shader use ac_nir_export_(position|parameter)

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20691>
This commit is contained in:
Qiang Yu
2022-12-23 20:58:59 +08:00
committed by Marge Bot
parent 7308637bb4
commit 7c41cdb81f
4 changed files with 54 additions and 63 deletions

View File

@@ -465,7 +465,13 @@ emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info,
nir_shader *
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
const uint8_t *param_offsets,
bool has_param_exports,
bool disable_streamout,
bool kill_pointsize,
bool force_vrs,
ac_nir_gs_output_info *output_info)
{
nir_builder b = nir_builder_init_simple_shader(
@@ -474,6 +480,9 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
nir_foreach_shader_out_variable(var, gs_nir)
nir_shader_add_variable(b.shader, nir_variable_clone(var, b.shader));
b.shader->info.outputs_written = gs_nir->info.outputs_written;
b.shader->info.outputs_written_16bit = gs_nir->info.outputs_written_16bit;
nir_ssa_def *gsvs_ring = nir_load_ring_gsvs_amd(&b);
nir_xfb_info *info = gs_nir->xfb_info;
@@ -507,6 +516,14 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
.base = offset,
.access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY);
/* clamp legacy color output */
if (i == VARYING_SLOT_COL0 || i == VARYING_SLOT_COL1 ||
i == VARYING_SLOT_BFC0 || i == VARYING_SLOT_BFC0) {
nir_ssa_def *color = outputs.data[i][j];
nir_ssa_def *clamp = nir_load_clamp_vertex_color_amd(&b);
outputs.data[i][j] = nir_bcsel(&b, clamp, nir_fsat(&b, color), color);
}
offset += gs_nir->info.gs.vertices_out * 16 * 4;
}
}
@@ -539,49 +556,21 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
emit_streamout(&b, stream, info, &outputs);
if (stream == 0) {
u_foreach_bit64 (i, gs_nir->info.outputs_written) {
unsigned location = output_info->slot_to_location ?
output_info->slot_to_location[i] : i;
uint64_t export_outputs = b.shader->info.outputs_written;
if (kill_pointsize)
export_outputs &= ~VARYING_BIT_PSIZ;
for (unsigned j = 0; j < 4; j++) {
if (outputs.data[i][j]) {
nir_store_output(&b, outputs.data[i][j], zero,
.base = location,
.component = j,
.write_mask = 1,
.io_semantics = {.location = i, .num_slots = 1});
}
}
ac_nir_export_position(&b, gfx_level, clip_cull_mask, !has_param_exports,
force_vrs, export_outputs, outputs.data);
if (has_param_exports) {
ac_nir_export_parameter(&b, param_offsets,
b.shader->info.outputs_written,
b.shader->info.outputs_written_16bit,
outputs.data,
outputs.data_16bit_lo,
outputs.data_16bit_hi);
}
u_foreach_bit (i, gs_nir->info.outputs_written_16bit) {
unsigned location = output_info->slot_to_location_16bit ?
output_info->slot_to_location_16bit[i] : VARYING_SLOT_VAR0_16BIT + i;
for (unsigned j = 0; j < 4; j++) {
if (outputs.data_16bit_lo[i][j]) {
nir_store_output(&b, outputs.data_16bit_lo[i][j], zero,
.base = location,
.component = j,
.write_mask = 1,
.io_semantics = {.location = i, .num_slots = 1});
}
if (outputs.data_16bit_hi[i][j]) {
nir_store_output(&b, outputs.data_16bit_hi[i][j], zero,
.base = location,
.component = j,
.write_mask = 1,
.io_semantics = {
.location = i,
.high_16bits = true,
.num_slots = 1
});
}
}
}
nir_export_vertex_amd(&b);
}
if (stream_id)

View File

@@ -226,15 +226,17 @@ typedef struct ac_nir_gs_output_info {
/* type for each 16bit slot component */
nir_alu_type (*types_16bit_lo)[4];
nir_alu_type (*types_16bit_hi)[4];
/* map varying slot to driver location */
const uint8_t *slot_to_location;
const uint8_t *slot_to_location_16bit;
} ac_nir_gs_output_info;
nir_shader *
ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
enum amd_gfx_level gfx_level,
uint32_t clip_cull_mask,
const uint8_t *param_offsets,
bool has_param_exports,
bool disable_streamout,
bool kill_pointsize,
bool force_vrs,
ac_nir_gs_output_info *output_info);
void

View File

@@ -3042,7 +3042,14 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline,
.usage_mask = gs_info->gs.output_usage_mask,
};
nir_shader *nir =
ac_nir_create_gs_copy_shader(stages[MESA_SHADER_GEOMETRY].nir, false, &output_info);
ac_nir_create_gs_copy_shader(stages[MESA_SHADER_GEOMETRY].nir,
device->physical_device->rad_info.gfx_level,
gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask,
gs_info->outinfo.vs_output_param_offset,
gs_info->outinfo.param_exports,
false, false, false,
&output_info);
nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader");
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
@@ -3051,15 +3058,7 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline,
info.wave_size = 64; /* Wave32 not supported. */
info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
info.so = gs_info->so;
if (gs_info->outinfo.export_clip_dists) {
if (stages[MESA_SHADER_GEOMETRY].nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
info.outinfo.vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = info.outinfo.param_exports++;
if (stages[MESA_SHADER_GEOMETRY].nir->info.outputs_written & VARYING_BIT_CLIP_DIST1)
info.outinfo.vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = info.outinfo.param_exports++;
info.outinfo.export_clip_dists = true;
}
info.outinfo = gs_info->outinfo;
struct radv_shader_args gs_copy_args = {0};
gs_copy_args.is_gs_copy_shader = true;

View File

@@ -2051,9 +2051,18 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen,
shader->info.nr_pos_exports = si_get_nr_pos_exports(gs_selector, gskey);
unsigned clip_cull_mask =
(gsinfo->clipdist_mask & ~gskey->ge.opt.kill_clip_distances) | gsinfo->culldist_mask;
nir_shader *nir =
ac_nir_create_gs_copy_shader(gs_nir,
sscreen->info.gfx_level,
clip_cull_mask,
shader->info.vs_output_param_offset,
shader->info.nr_param_exports,
gskey->ge.opt.remove_streamout,
gskey->ge.opt.kill_pointsize,
sscreen->options.vrs2x2,
output_info);
struct si_shader_args args;
@@ -2095,9 +2104,6 @@ struct si_gs_output_info {
uint8_t usage_mask_16bit_lo[16];
uint8_t usage_mask_16bit_hi[16];
uint8_t slot_to_location[64];
uint8_t slot_to_location_16bit[16];
ac_nir_gs_output_info info;
};
@@ -2109,7 +2115,6 @@ si_init_gs_output_info(struct si_shader_info *info, struct si_gs_output_info *ou
if (slot < VARYING_SLOT_VAR0_16BIT) {
out_info->streams[slot] = info->output_streams[i];
out_info->usage_mask[slot] = info->output_usagemask[i];
out_info->slot_to_location[slot] = i;
} else {
unsigned index = slot - VARYING_SLOT_VAR0_16BIT;
/* TODO: 16bit need separated fields for lo/hi part. */
@@ -2117,7 +2122,6 @@ si_init_gs_output_info(struct si_shader_info *info, struct si_gs_output_info *ou
out_info->streams_16bit_hi[index] = info->output_streams[i];
out_info->usage_mask_16bit_lo[index] = info->output_usagemask[i];
out_info->usage_mask_16bit_hi[index] = info->output_usagemask[i];
out_info->slot_to_location_16bit[index] = i;
}
}
@@ -2133,9 +2137,6 @@ si_init_gs_output_info(struct si_shader_info *info, struct si_gs_output_info *ou
/* TODO: construct 16bit slot per component store type. */
ac_info->types_16bit_lo = ac_info->types_16bit_hi = NULL;
ac_info->slot_to_location = out_info->slot_to_location;
ac_info->slot_to_location_16bit = out_info->slot_to_location_16bit;
}
bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,