From 7c41cdb81f564ef52404e5f313ccfe2d723896e6 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Fri, 23 Dec 2022 20:58:59 +0800 Subject: [PATCH] ac/nir,radv,radeonsi: gs copy shader use ac_nir_export_(position|parameter) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Timur Kristóf Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- src/amd/common/ac_nir.c | 71 ++++++++++-------------- src/amd/common/ac_nir.h | 10 ++-- src/amd/vulkan/radv_pipeline.c | 19 +++---- src/gallium/drivers/radeonsi/si_shader.c | 17 +++--- 4 files changed, 54 insertions(+), 63 deletions(-) diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index fbe1f3d1bfb..4fb4088cc27 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -465,7 +465,13 @@ emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info, nir_shader * ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, + enum amd_gfx_level gfx_level, + uint32_t clip_cull_mask, + const uint8_t *param_offsets, + bool has_param_exports, bool disable_streamout, + bool kill_pointsize, + bool force_vrs, ac_nir_gs_output_info *output_info) { nir_builder b = nir_builder_init_simple_shader( @@ -474,6 +480,9 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, nir_foreach_shader_out_variable(var, gs_nir) nir_shader_add_variable(b.shader, nir_variable_clone(var, b.shader)); + b.shader->info.outputs_written = gs_nir->info.outputs_written; + b.shader->info.outputs_written_16bit = gs_nir->info.outputs_written_16bit; + nir_ssa_def *gsvs_ring = nir_load_ring_gsvs_amd(&b); nir_xfb_info *info = gs_nir->xfb_info; @@ -507,6 +516,14 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, .base = offset, .access = ACCESS_COHERENT | ACCESS_STREAM_CACHE_POLICY); + /* clamp legacy color output */ + if (i == VARYING_SLOT_COL0 || i == VARYING_SLOT_COL1 || + i == VARYING_SLOT_BFC0 || i == VARYING_SLOT_BFC0) { + nir_ssa_def *color = outputs.data[i][j]; + nir_ssa_def *clamp = nir_load_clamp_vertex_color_amd(&b); + outputs.data[i][j] = nir_bcsel(&b, clamp, nir_fsat(&b, color), color); + } + offset += gs_nir->info.gs.vertices_out * 16 * 4; } } @@ -539,49 +556,21 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, emit_streamout(&b, stream, info, &outputs); if (stream == 0) { - u_foreach_bit64 (i, gs_nir->info.outputs_written) { - unsigned location = output_info->slot_to_location ? - output_info->slot_to_location[i] : i; + uint64_t export_outputs = b.shader->info.outputs_written; + if (kill_pointsize) + export_outputs &= ~VARYING_BIT_PSIZ; - for (unsigned j = 0; j < 4; j++) { - if (outputs.data[i][j]) { - nir_store_output(&b, outputs.data[i][j], zero, - .base = location, - .component = j, - .write_mask = 1, - .io_semantics = {.location = i, .num_slots = 1}); - } - } + ac_nir_export_position(&b, gfx_level, clip_cull_mask, !has_param_exports, + force_vrs, export_outputs, outputs.data); + + if (has_param_exports) { + ac_nir_export_parameter(&b, param_offsets, + b.shader->info.outputs_written, + b.shader->info.outputs_written_16bit, + outputs.data, + outputs.data_16bit_lo, + outputs.data_16bit_hi); } - - u_foreach_bit (i, gs_nir->info.outputs_written_16bit) { - unsigned location = output_info->slot_to_location_16bit ? - output_info->slot_to_location_16bit[i] : VARYING_SLOT_VAR0_16BIT + i; - - for (unsigned j = 0; j < 4; j++) { - if (outputs.data_16bit_lo[i][j]) { - nir_store_output(&b, outputs.data_16bit_lo[i][j], zero, - .base = location, - .component = j, - .write_mask = 1, - .io_semantics = {.location = i, .num_slots = 1}); - } - - if (outputs.data_16bit_hi[i][j]) { - nir_store_output(&b, outputs.data_16bit_hi[i][j], zero, - .base = location, - .component = j, - .write_mask = 1, - .io_semantics = { - .location = i, - .high_16bits = true, - .num_slots = 1 - }); - } - } - } - - nir_export_vertex_amd(&b); } if (stream_id) diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index ed99a8a025c..81b9b02454f 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -226,15 +226,17 @@ typedef struct ac_nir_gs_output_info { /* type for each 16bit slot component */ nir_alu_type (*types_16bit_lo)[4]; nir_alu_type (*types_16bit_hi)[4]; - - /* map varying slot to driver location */ - const uint8_t *slot_to_location; - const uint8_t *slot_to_location_16bit; } ac_nir_gs_output_info; nir_shader * ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, + enum amd_gfx_level gfx_level, + uint32_t clip_cull_mask, + const uint8_t *param_offsets, + bool has_param_exports, bool disable_streamout, + bool kill_pointsize, + bool force_vrs, ac_nir_gs_output_info *output_info); void diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 8dbe0b1e1f1..404a3eb0048 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3042,7 +3042,14 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline, .usage_mask = gs_info->gs.output_usage_mask, }; nir_shader *nir = - ac_nir_create_gs_copy_shader(stages[MESA_SHADER_GEOMETRY].nir, false, &output_info); + ac_nir_create_gs_copy_shader(stages[MESA_SHADER_GEOMETRY].nir, + device->physical_device->rad_info.gfx_level, + gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, + gs_info->outinfo.vs_output_param_offset, + gs_info->outinfo.param_exports, + false, false, false, + &output_info); + nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader"); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); @@ -3051,15 +3058,7 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline, info.wave_size = 64; /* Wave32 not supported. */ info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */ info.so = gs_info->so; - - if (gs_info->outinfo.export_clip_dists) { - if (stages[MESA_SHADER_GEOMETRY].nir->info.outputs_written & VARYING_BIT_CLIP_DIST0) - info.outinfo.vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = info.outinfo.param_exports++; - if (stages[MESA_SHADER_GEOMETRY].nir->info.outputs_written & VARYING_BIT_CLIP_DIST1) - info.outinfo.vs_output_param_offset[VARYING_SLOT_CLIP_DIST1] = info.outinfo.param_exports++; - - info.outinfo.export_clip_dists = true; - } + info.outinfo = gs_info->outinfo; struct radv_shader_args gs_copy_args = {0}; gs_copy_args.is_gs_copy_shader = true; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f69ad20afd3..0c6e6154ebe 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2051,9 +2051,18 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, shader->info.nr_pos_exports = si_get_nr_pos_exports(gs_selector, gskey); + unsigned clip_cull_mask = + (gsinfo->clipdist_mask & ~gskey->ge.opt.kill_clip_distances) | gsinfo->culldist_mask; + nir_shader *nir = ac_nir_create_gs_copy_shader(gs_nir, + sscreen->info.gfx_level, + clip_cull_mask, + shader->info.vs_output_param_offset, + shader->info.nr_param_exports, gskey->ge.opt.remove_streamout, + gskey->ge.opt.kill_pointsize, + sscreen->options.vrs2x2, output_info); struct si_shader_args args; @@ -2095,9 +2104,6 @@ struct si_gs_output_info { uint8_t usage_mask_16bit_lo[16]; uint8_t usage_mask_16bit_hi[16]; - uint8_t slot_to_location[64]; - uint8_t slot_to_location_16bit[16]; - ac_nir_gs_output_info info; }; @@ -2109,7 +2115,6 @@ si_init_gs_output_info(struct si_shader_info *info, struct si_gs_output_info *ou if (slot < VARYING_SLOT_VAR0_16BIT) { out_info->streams[slot] = info->output_streams[i]; out_info->usage_mask[slot] = info->output_usagemask[i]; - out_info->slot_to_location[slot] = i; } else { unsigned index = slot - VARYING_SLOT_VAR0_16BIT; /* TODO: 16bit need separated fields for lo/hi part. */ @@ -2117,7 +2122,6 @@ si_init_gs_output_info(struct si_shader_info *info, struct si_gs_output_info *ou out_info->streams_16bit_hi[index] = info->output_streams[i]; out_info->usage_mask_16bit_lo[index] = info->output_usagemask[i]; out_info->usage_mask_16bit_hi[index] = info->output_usagemask[i]; - out_info->slot_to_location_16bit[index] = i; } } @@ -2133,9 +2137,6 @@ si_init_gs_output_info(struct si_shader_info *info, struct si_gs_output_info *ou /* TODO: construct 16bit slot per component store type. */ ac_info->types_16bit_lo = ac_info->types_16bit_hi = NULL; - - ac_info->slot_to_location = out_info->slot_to_location; - ac_info->slot_to_location_16bit = out_info->slot_to_location_16bit; } bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,