radv: pre-calc "simple" dynamic vertex input values

when the shader pipeline is known to not require any of the more complex
calculations, those calculations can be excluded from the dynamic update
code

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13320>
This commit is contained in:
Mike Blumenkrantz
2021-07-01 15:20:36 -04:00
committed by Marge Bot
parent c335a4d70e
commit eda5634e25
4 changed files with 35 additions and 23 deletions

View File

@@ -2727,11 +2727,11 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant
STATIC_ASSERT(sizeof(union vs_prolog_key_header) == 4);
assert(vs_shader->info.vs.dynamic_inputs);
struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input;
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
const struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input;
const struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
struct radv_device *device = cmd_buffer->device;
unsigned num_attributes = util_last_bit(vs_shader->info.vs.vb_desc_usage_mask);
unsigned num_attributes = pipeline->last_vertex_attrib_bit;
uint32_t attribute_mask = BITFIELD_MASK(num_attributes);
uint32_t instance_rate_inputs = state->instance_rate_inputs & attribute_mask;
@@ -2739,26 +2739,11 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant
enum chip_class chip = device->physical_device->rad_info.chip_class;
const uint32_t misaligned_mask = chip == GFX6 || chip >= GFX10 ? cmd_buffer->state.vbo_misaligned_mask : 0;
struct radv_vs_prolog_key key;
key.state = state;
key.num_attributes = num_attributes;
key.misaligned_mask = misaligned_mask;
/* The instance ID input VGPR is placed differently when as_ls=true. */
key.as_ls = vs_shader->info.vs.as_ls && instance_rate_inputs;
key.is_ngg = vs_shader->info.is_ngg;
key.wave32 = vs_shader->info.wave_size == 32;
key.next_stage = MESA_SHADER_VERTEX;
if (pipeline->shaders[MESA_SHADER_TESS_CTRL] == vs_shader)
key.next_stage = MESA_SHADER_TESS_CTRL;
else if (pipeline->shaders[MESA_SHADER_GEOMETRY] == vs_shader)
key.next_stage = MESA_SHADER_GEOMETRY;
/* try to use a pre-compiled prolog first */
struct radv_shader_prolog *prolog = NULL;
if (!key.as_ls && key.next_stage == MESA_SHADER_VERTEX &&
key.is_ngg == device->physical_device->use_ngg && !misaligned_mask &&
!state->alpha_adjust_lo && !state->alpha_adjust_hi &&
vs_shader->info.wave_size == device->physical_device->ge_wave_size) {
if (pipeline->can_use_simple_input &&
(!vs_shader->info.vs.as_ls || !instance_rate_inputs) &&
!misaligned_mask && !state->alpha_adjust_lo && !state->alpha_adjust_hi) {
if (!instance_rate_inputs) {
prolog = device->simple_vs_prologs[num_attributes - 1];
} else if (num_attributes <= 16 && !*nontrivial_divisors &&
@@ -2775,6 +2760,16 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant
uint32_t key_words[16];
unsigned key_size = 1;
struct radv_vs_prolog_key key;
key.state = state;
key.num_attributes = num_attributes;
key.misaligned_mask = misaligned_mask;
/* The instance ID input VGPR is placed differently when as_ls=true. */
key.as_ls = vs_shader->info.vs.as_ls && instance_rate_inputs;
key.is_ngg = vs_shader->info.is_ngg;
key.wave32 = vs_shader->info.wave_size == 32;
key.next_stage = pipeline->next_vertex_stage;
union vs_prolog_key_header header;
header.v = 0;
header.num_attributes = num_attributes;

View File

@@ -5400,8 +5400,22 @@ radv_pipeline_init_vertex_input_state(struct radv_pipeline *pipeline,
}
pipeline->use_per_attribute_vb_descs = info->vs.use_per_attribute_vb_descs;
pipeline->last_vertex_attrib_bit = util_last_bit(info->vs.vb_desc_usage_mask);
if (pipeline->shaders[MESA_SHADER_VERTEX])
pipeline->next_vertex_stage = MESA_SHADER_VERTEX;
else if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
pipeline->next_vertex_stage = MESA_SHADER_TESS_CTRL;
else
pipeline->next_vertex_stage = MESA_SHADER_GEOMETRY;
if (pipeline->next_vertex_stage == MESA_SHADER_VERTEX) {
const struct radv_shader_variant *vs_shader = pipeline->shaders[MESA_SHADER_VERTEX];
pipeline->can_use_simple_input = vs_shader->info.is_ngg == pipeline->device->physical_device->use_ngg &&
vs_shader->info.wave_size == pipeline->device->physical_device->ge_wave_size;
} else {
pipeline->can_use_simple_input = false;
}
if (info->vs.dynamic_inputs)
pipeline->vb_desc_usage_mask = BITFIELD_MASK(util_last_bit(info->vs.vb_desc_usage_mask));
pipeline->vb_desc_usage_mask = BITFIELD_MASK(pipeline->last_vertex_attrib_bit);
else
pipeline->vb_desc_usage_mask = info->vs.vb_desc_usage_mask;
pipeline->vb_desc_alloc_size = util_bitcount(pipeline->vb_desc_usage_mask) * 16;

View File

@@ -1782,6 +1782,9 @@ struct radv_pipeline {
uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS];
bool use_per_attribute_vb_descs;
bool can_use_simple_input;
uint8_t last_vertex_attrib_bit;
uint8_t next_vertex_stage : 8;
uint32_t vb_desc_usage_mask;
uint32_t vb_desc_alloc_size;

View File

@@ -381,7 +381,7 @@ struct radv_vs_input_state {
};
struct radv_vs_prolog_key {
struct radv_vs_input_state *state;
const struct radv_vs_input_state *state;
unsigned num_attributes;
uint32_t misaligned_mask;
bool as_ls;