radv: Move I/O lowering functions into a new file.
Also ran clang-format on the affected code. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21971>
This commit is contained in:
@@ -76,6 +76,7 @@ libradv_files = files(
|
||||
'nir/radv_nir_lower_abi.c',
|
||||
'nir/radv_nir_lower_fs_intrinsics.c',
|
||||
'nir/radv_nir_lower_intrinsics_early.c',
|
||||
'nir/radv_nir_lower_io.c',
|
||||
'nir/radv_nir_lower_primitive_shading_rate.c',
|
||||
'nir/radv_nir_lower_ray_queries.c',
|
||||
'nir/radv_nir_lower_view_index.c',
|
||||
|
@@ -70,6 +70,12 @@ bool radv_nir_lower_viewport_to_zero(nir_shader *nir);
|
||||
|
||||
bool radv_nir_export_multiview(nir_shader *nir);
|
||||
|
||||
void radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask);
|
||||
|
||||
void radv_nir_lower_io(struct radv_device *device, nir_shader *nir);
|
||||
|
||||
bool radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *stage);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
157
src/amd/vulkan/nir/radv_nir_lower_io.c
Normal file
157
src/amd/vulkan/nir/radv_nir_lower_io.c
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* Copyright © 2016 Red Hat.
|
||||
* Copyright © 2016 Bas Nieuwenhuizen
|
||||
* Copyright © 2023 Valve Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ac_nir.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "radv_nir.h"
|
||||
#include "radv_private.h"
|
||||
#include "radv_shader.h"
|
||||
|
||||
static int
|
||||
type_size_vec4(const struct glsl_type *type, bool bindless)
|
||||
{
|
||||
return glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
|
||||
void
|
||||
radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
NIR_PASS(progress, nir, nir_lower_array_deref_of_vec, mask,
|
||||
nir_lower_direct_array_deref_of_vec_load | nir_lower_indirect_array_deref_of_vec_load |
|
||||
nir_lower_direct_array_deref_of_vec_store |
|
||||
nir_lower_indirect_array_deref_of_vec_store);
|
||||
NIR_PASS(progress, nir, nir_lower_io_to_scalar_early, mask);
|
||||
if (progress) {
|
||||
/* Optimize the new vector code and then remove dead vars */
|
||||
NIR_PASS(_, nir, nir_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_shrink_vectors);
|
||||
|
||||
if (mask & nir_var_shader_out) {
|
||||
/* Optimize swizzled movs of load_const for nir_link_opt_varyings's constant propagation. */
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
|
||||
/* For nir_link_opt_varyings's duplicate input opt */
|
||||
NIR_PASS(_, nir, nir_opt_cse);
|
||||
}
|
||||
|
||||
/* Run copy-propagation to help remove dead output variables (some shaders have useless copies
|
||||
* to/from an output), so compaction later will be more effective.
|
||||
*
|
||||
* This will have been done earlier but it might not have worked because the outputs were
|
||||
* vector.
|
||||
*/
|
||||
if (nir->info.stage == MESA_SHADER_TESS_CTRL)
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables,
|
||||
nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
|
||||
{
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT);
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4,
|
||||
nir_lower_io_lower_64bit_to_32);
|
||||
} else {
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4,
|
||||
nir_lower_io_lower_64bit_to_32);
|
||||
}
|
||||
|
||||
/* This pass needs actual constants */
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
|
||||
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
|
||||
|
||||
if (device->physical_device->use_ngg_streamout && nir->xfb_info) {
|
||||
NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
|
||||
|
||||
/* The total number of shader outputs is required for computing the pervertex LDS size for
|
||||
* VS/TES when lowering NGG streamout.
|
||||
*/
|
||||
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *stage)
|
||||
{
|
||||
const struct radv_shader_info *info = &stage->info;
|
||||
nir_shader *nir = stage->nir;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
if (info->vs.as_ls) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, NULL, info->vs.tcs_in_out_eq,
|
||||
info->vs.tcs_temp_only_input_mask);
|
||||
return true;
|
||||
} else if (info->vs.as_es) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL,
|
||||
device->physical_device->rad_info.gfx_level, info->esgs_itemsize);
|
||||
return true;
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, NULL, info->vs.tcs_in_out_eq);
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, NULL,
|
||||
device->physical_device->rad_info.gfx_level, info->tcs.tes_reads_tess_factors,
|
||||
info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read,
|
||||
info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, info->wave_size,
|
||||
false, false, true);
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, NULL);
|
||||
|
||||
if (info->tes.as_es) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL,
|
||||
device->physical_device->rad_info.gfx_level, info->esgs_itemsize);
|
||||
}
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, NULL,
|
||||
device->physical_device->rad_info.gfx_level, false);
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TASK) {
|
||||
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
|
||||
device->physical_device->task_info.num_entries);
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_MESH) {
|
||||
ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
|
||||
device->physical_device->task_info.num_entries);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
@@ -1585,44 +1585,6 @@ merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info)
|
||||
tcs_info->tess.point_mode = tes_info->tess.point_mode;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
NIR_PASS(progress, nir, nir_lower_array_deref_of_vec, mask,
|
||||
nir_lower_direct_array_deref_of_vec_load | nir_lower_indirect_array_deref_of_vec_load |
|
||||
nir_lower_direct_array_deref_of_vec_store |
|
||||
nir_lower_indirect_array_deref_of_vec_store);
|
||||
NIR_PASS(progress, nir, nir_lower_io_to_scalar_early, mask);
|
||||
if (progress) {
|
||||
/* Optimize the new vector code and then remove dead vars */
|
||||
NIR_PASS(_, nir, nir_copy_prop);
|
||||
NIR_PASS(_, nir, nir_opt_shrink_vectors);
|
||||
|
||||
if (mask & nir_var_shader_out) {
|
||||
/* Optimize swizzled movs of load_const for nir_link_opt_varyings's constant propagation. */
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
|
||||
/* For nir_link_opt_varyings's duplicate input opt */
|
||||
NIR_PASS(_, nir, nir_opt_cse);
|
||||
}
|
||||
|
||||
/* Run copy-propagation to help remove dead output variables (some shaders have useless copies
|
||||
* to/from an output), so compaction later will be more effective.
|
||||
*
|
||||
* This will have been done earlier but it might not have worked because the outputs were
|
||||
* vector.
|
||||
*/
|
||||
if (nir->info.stage == MESA_SHADER_TESS_CTRL)
|
||||
NIR_PASS(_, nir, nir_opt_copy_prop_vars);
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables,
|
||||
nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_pipeline_link_shaders(const struct radv_device *device,
|
||||
nir_shader *producer, nir_shader *consumer,
|
||||
@@ -1654,8 +1616,8 @@ radv_pipeline_link_shaders(const struct radv_device *device,
|
||||
nir_validate_shader(producer, "after nir_lower_io_arrays_to_elements");
|
||||
nir_validate_shader(consumer, "after nir_lower_io_arrays_to_elements");
|
||||
|
||||
radv_lower_io_to_scalar_early(producer, nir_var_shader_out);
|
||||
radv_lower_io_to_scalar_early(consumer, nir_var_shader_in);
|
||||
radv_nir_lower_io_to_scalar_early(producer, nir_var_shader_out);
|
||||
radv_nir_lower_io_to_scalar_early(consumer, nir_var_shader_in);
|
||||
|
||||
/* Remove PSIZ from shaders when it's not needed.
|
||||
* This is typically produced by translation layers like Zink or D9VK.
|
||||
@@ -2993,7 +2955,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo
|
||||
}
|
||||
|
||||
/* Lower I/O intrinsics to memory instructions. */
|
||||
bool io_to_mem = radv_lower_io_to_mem(device, stage);
|
||||
bool io_to_mem = radv_nir_lower_io_to_mem(device, stage);
|
||||
bool lowered_ngg = stage->info.is_ngg && stage->stage == last_vgt_api_stage;
|
||||
if (lowered_ngg)
|
||||
radv_lower_ngg(device, stage, pipeline_key);
|
||||
@@ -3355,7 +3317,7 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline,
|
||||
|
||||
/* Gather info again, information such as outputs_read can be out-of-date. */
|
||||
nir_shader_gather_info(stages[i].nir, nir_shader_get_entrypoint(stages[i].nir));
|
||||
radv_lower_io(device, stages[i].nir);
|
||||
radv_nir_lower_io(device, stages[i].nir);
|
||||
|
||||
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
@@ -744,95 +744,6 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_
|
||||
return nir;
|
||||
}
|
||||
|
||||
static int
|
||||
type_size_vec4(const struct glsl_type *type, bool bindless)
|
||||
{
|
||||
return glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
|
||||
void
|
||||
radv_lower_io(struct radv_device *device, nir_shader *nir)
|
||||
{
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT);
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_out, type_size_vec4,
|
||||
nir_lower_io_lower_64bit_to_32);
|
||||
} else {
|
||||
NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4,
|
||||
nir_lower_io_lower_64bit_to_32);
|
||||
}
|
||||
|
||||
/* This pass needs actual constants */
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
|
||||
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
|
||||
|
||||
if (device->physical_device->use_ngg_streamout && nir->xfb_info) {
|
||||
NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
|
||||
|
||||
/* The total number of shader outputs is required for computing the pervertex LDS size for
|
||||
* VS/TES when lowering NGG streamout.
|
||||
*/
|
||||
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *stage)
|
||||
{
|
||||
const struct radv_shader_info *info = &stage->info;
|
||||
nir_shader *nir = stage->nir;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
if (info->vs.as_ls) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, NULL, info->vs.tcs_in_out_eq,
|
||||
info->vs.tcs_temp_only_input_mask);
|
||||
return true;
|
||||
} else if (info->vs.as_es) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL,
|
||||
device->physical_device->rad_info.gfx_level, info->esgs_itemsize);
|
||||
return true;
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, NULL, info->vs.tcs_in_out_eq);
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, NULL,
|
||||
device->physical_device->rad_info.gfx_level,
|
||||
info->tcs.tes_reads_tess_factors, info->tcs.tes_inputs_read,
|
||||
info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs,
|
||||
info->tcs.num_linked_patch_outputs, info->wave_size,
|
||||
false, false, true);
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, NULL);
|
||||
|
||||
if (info->tes.as_es) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, NULL,
|
||||
device->physical_device->rad_info.gfx_level, info->esgs_itemsize);
|
||||
}
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, NULL,
|
||||
device->physical_device->rad_info.gfx_level, false);
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TASK) {
|
||||
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
|
||||
device->physical_device->task_info.num_entries);
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_MESH) {
|
||||
ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
|
||||
device->physical_device->task_info.num_entries);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
radv_consider_culling(const struct radv_physical_device *pdevice, struct nir_shader *nir, uint64_t ps_inputs_read,
|
||||
unsigned num_vertices_per_primitive, const struct radv_shader_info *info)
|
||||
|
@@ -744,10 +744,6 @@ get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_ver
|
||||
return num_patches;
|
||||
}
|
||||
|
||||
void radv_lower_io(struct radv_device *device, nir_shader *nir);
|
||||
|
||||
bool radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *stage);
|
||||
|
||||
void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_stage,
|
||||
const struct radv_pipeline_key *pl_key);
|
||||
|
||||
|
Reference in New Issue
Block a user