radv: Move VS input lowering to new file: radv_nir_lower_vs_inputs.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21358>
This commit is contained in:
Timur Kristóf
2023-02-10 08:29:53 +01:00
committed by Marge Bot
parent 450e173de0
commit 74f1b77046
5 changed files with 140 additions and 58 deletions

View File

@@ -93,6 +93,7 @@ libradv_files = files(
'radv_nir_apply_pipeline_layout.c',
'radv_nir_lower_abi.c',
'radv_nir_lower_ray_queries.c',
'radv_nir_lower_vs_inputs.c',
'radv_perfcounter.c',
'radv_pipeline.c',
'radv_pipeline_cache.c',

View File

@@ -65,50 +65,6 @@ ngg_query_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
return nir_test_mask(b, settings, mask);
}
static nir_ssa_def *
lower_load_vs_input_from_prolog(nir_builder *b,
nir_intrinsic_instr *intrin,
lower_abi_state *s)
{
nir_src *offset_src = nir_get_io_offset_src(intrin);
assert(nir_src_is_const(*offset_src));
const unsigned base = nir_intrinsic_base(intrin);
const unsigned base_offset = nir_src_as_uint(*offset_src);
const unsigned driver_location = base + base_offset - VERT_ATTRIB_GENERIC0;
const unsigned component = nir_intrinsic_component(intrin);
const unsigned bit_size = intrin->dest.ssa.bit_size;
const unsigned num_components = intrin->dest.ssa.num_components;
/* 64-bit inputs: they occupy twice as many 32-bit components.
* 16-bit inputs: they occupy a 32-bit component (not packed).
*/
const unsigned arg_bit_size = MAX2(bit_size, 32);
unsigned num_input_args = 1;
nir_ssa_def *input_args[2] = {ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
if (component * 32 + arg_bit_size * num_components > 128) {
assert(bit_size == 64);
num_input_args++;
input_args[1] = ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location + 1]);
}
nir_ssa_def *extracted = nir_extract_bits(b, input_args, num_input_args, component * 32,
num_components, arg_bit_size);
if (bit_size < arg_bit_size) {
assert(bit_size == 16);
if (nir_alu_type_get_base_type(nir_intrinsic_dest_type(intrin)) == nir_type_float)
return nir_f2f16(b, extracted);
else
return nir_u2u16(b, extracted);
}
return extracted;
}
static bool
lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
{
@@ -474,20 +430,6 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
case nir_intrinsic_load_ordered_id_amd:
replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0, 12);
break;
case nir_intrinsic_load_input: {
/* Only VS inputs need to be lowered at this point. */
if (stage != MESA_SHADER_VERTEX)
return false;
if (s->info->vs.dynamic_inputs) {
replacement = lower_load_vs_input_from_prolog(b, intrin, s);
} else {
/* TODO: Lower non-dynamic inputs too. */
return false;
}
break;
}
case nir_intrinsic_load_force_vrs_rates_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.force_vrs_rates);
break;

View File

@@ -0,0 +1,128 @@
/*
* Copyright © 2023 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "ac_nir.h"
#include "nir.h"
#include "nir_builder.h"
#include "radv_constants.h"
#include "radv_private.h"
#include "radv_shader.h"
#include "radv_shader_args.h"
typedef struct {
const struct radv_shader_args *args;
const struct radv_shader_info *info;
const struct radv_pipeline_key *pl_key;
uint32_t address32_hi;
} lower_vs_inputs_state;
static nir_ssa_def *
lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin,
lower_vs_inputs_state *s)
{
nir_src *offset_src = nir_get_io_offset_src(intrin);
assert(nir_src_is_const(*offset_src));
const unsigned base = nir_intrinsic_base(intrin);
const unsigned base_offset = nir_src_as_uint(*offset_src);
const unsigned driver_location = base + base_offset - VERT_ATTRIB_GENERIC0;
const unsigned component = nir_intrinsic_component(intrin);
const unsigned bit_size = intrin->dest.ssa.bit_size;
const unsigned num_components = intrin->dest.ssa.num_components;
/* 64-bit inputs: they occupy twice as many 32-bit components.
* 16-bit inputs: they occupy a 32-bit component (not packed).
*/
const unsigned arg_bit_size = MAX2(bit_size, 32);
unsigned num_input_args = 1;
nir_ssa_def *input_args[2] = {
ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
if (component * 32 + arg_bit_size * num_components > 128) {
assert(bit_size == 64);
num_input_args++;
input_args[1] = ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location + 1]);
}
nir_ssa_def *extracted =
nir_extract_bits(b, input_args, num_input_args, component * 32, num_components, arg_bit_size);
if (bit_size < arg_bit_size) {
assert(bit_size == 16);
if (nir_alu_type_get_base_type(nir_intrinsic_dest_type(intrin)) == nir_type_float)
return nir_f2f16(b, extracted);
else
return nir_u2u16(b, extracted);
}
return extracted;
}
static bool
lower_vs_input_instr(nir_builder *b, nir_instr *instr, void *state)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_input)
return false;
lower_vs_inputs_state *s = (lower_vs_inputs_state *)state;
b->cursor = nir_before_instr(instr);
nir_ssa_def *replacement = NULL;
if (s->info->vs.dynamic_inputs) {
replacement = lower_load_vs_input_from_prolog(b, intrin, s);
} else {
/* TODO: lower non-dynamic inputs */
return false;
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_instr_remove(instr);
nir_instr_free(instr);
return true;
}
bool
radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_pipeline_stage *vs_stage,
const struct radv_pipeline_key *pl_key, uint32_t address32_hi)
{
assert(shader->info.stage == MESA_SHADER_VERTEX);
lower_vs_inputs_state state = {
.info = &vs_stage->info,
.args = &vs_stage->args,
.pl_key = pl_key,
.address32_hi = address32_hi,
};
return nir_shader_instructions_pass(shader, lower_vs_input_instr,
nir_metadata_dominance | nir_metadata_block_index, &state);
}

View File

@@ -3208,6 +3208,14 @@ radv_postprocess_nir(struct radv_pipeline *pipeline,
nir_move_load_input | nir_move_const_undef | nir_move_copies);
}
/* Lower VS inputs. We need to do this after nir_opt_sink, because
* load_input can be reordered, but buffer loads can't.
*/
if (stage->stage == MESA_SHADER_VERTEX) {
NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, pipeline_key,
device->physical_device->rad_info.address32_hi);
}
/* Lower I/O intrinsics to memory instructions. */
bool io_to_mem = radv_lower_io_to_mem(device, stage);
bool lowered_ngg = stage->info.is_ngg && stage->stage == last_vgt_api_stage;

View File

@@ -551,6 +551,9 @@ void radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level,
const struct radv_shader_info *info, const struct radv_shader_args *args,
const struct radv_pipeline_key *pl_key, uint32_t address32_hi);
bool radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_pipeline_stage *vs_stage,
const struct radv_pipeline_key *key, uint32_t address32_hi);
void radv_init_shader_arenas(struct radv_device *device);
void radv_destroy_shader_arenas(struct radv_device *device);