From eda5634e25735b26b56f3dda8cec2c4c4302ae59 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 1 Jul 2021 15:20:36 -0400 Subject: [PATCH] radv: pre-calc "simple" dynamic vertex input values when the shader pipeline is known to not require any of the more complex calculations, those calculations can be excluded from the dynamic update code Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 37 ++++++++++++++------------------ src/amd/vulkan/radv_pipeline.c | 16 +++++++++++++- src/amd/vulkan/radv_private.h | 3 +++ src/amd/vulkan/radv_shader.h | 2 +- 4 files changed, 35 insertions(+), 23 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 1587bdc6a57..b6d6c301d0d 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2727,11 +2727,11 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant STATIC_ASSERT(sizeof(union vs_prolog_key_header) == 4); assert(vs_shader->info.vs.dynamic_inputs); - struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input; - struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input; + const struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; struct radv_device *device = cmd_buffer->device; - unsigned num_attributes = util_last_bit(vs_shader->info.vs.vb_desc_usage_mask); + unsigned num_attributes = pipeline->last_vertex_attrib_bit; uint32_t attribute_mask = BITFIELD_MASK(num_attributes); uint32_t instance_rate_inputs = state->instance_rate_inputs & attribute_mask; @@ -2739,26 +2739,11 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant enum chip_class chip = device->physical_device->rad_info.chip_class; const uint32_t misaligned_mask = chip == GFX6 || chip >= GFX10 ? cmd_buffer->state.vbo_misaligned_mask : 0; - struct radv_vs_prolog_key key; - key.state = state; - key.num_attributes = num_attributes; - key.misaligned_mask = misaligned_mask; - /* The instance ID input VGPR is placed differently when as_ls=true. */ - key.as_ls = vs_shader->info.vs.as_ls && instance_rate_inputs; - key.is_ngg = vs_shader->info.is_ngg; - key.wave32 = vs_shader->info.wave_size == 32; - key.next_stage = MESA_SHADER_VERTEX; - if (pipeline->shaders[MESA_SHADER_TESS_CTRL] == vs_shader) - key.next_stage = MESA_SHADER_TESS_CTRL; - else if (pipeline->shaders[MESA_SHADER_GEOMETRY] == vs_shader) - key.next_stage = MESA_SHADER_GEOMETRY; - /* try to use a pre-compiled prolog first */ struct radv_shader_prolog *prolog = NULL; - if (!key.as_ls && key.next_stage == MESA_SHADER_VERTEX && - key.is_ngg == device->physical_device->use_ngg && !misaligned_mask && - !state->alpha_adjust_lo && !state->alpha_adjust_hi && - vs_shader->info.wave_size == device->physical_device->ge_wave_size) { + if (pipeline->can_use_simple_input && + (!vs_shader->info.vs.as_ls || !instance_rate_inputs) && + !misaligned_mask && !state->alpha_adjust_lo && !state->alpha_adjust_hi) { if (!instance_rate_inputs) { prolog = device->simple_vs_prologs[num_attributes - 1]; } else if (num_attributes <= 16 && !*nontrivial_divisors && @@ -2775,6 +2760,16 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant uint32_t key_words[16]; unsigned key_size = 1; + struct radv_vs_prolog_key key; + key.state = state; + key.num_attributes = num_attributes; + key.misaligned_mask = misaligned_mask; + /* The instance ID input VGPR is placed differently when as_ls=true. */ + key.as_ls = vs_shader->info.vs.as_ls && instance_rate_inputs; + key.is_ngg = vs_shader->info.is_ngg; + key.wave32 = vs_shader->info.wave_size == 32; + key.next_stage = pipeline->next_vertex_stage; + union vs_prolog_key_header header; header.v = 0; header.num_attributes = num_attributes; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index e7cbeb7e892..5cf991dd93d 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -5400,8 +5400,22 @@ radv_pipeline_init_vertex_input_state(struct radv_pipeline *pipeline, } pipeline->use_per_attribute_vb_descs = info->vs.use_per_attribute_vb_descs; + pipeline->last_vertex_attrib_bit = util_last_bit(info->vs.vb_desc_usage_mask); + if (pipeline->shaders[MESA_SHADER_VERTEX]) + pipeline->next_vertex_stage = MESA_SHADER_VERTEX; + else if (pipeline->shaders[MESA_SHADER_TESS_CTRL]) + pipeline->next_vertex_stage = MESA_SHADER_TESS_CTRL; + else + pipeline->next_vertex_stage = MESA_SHADER_GEOMETRY; + if (pipeline->next_vertex_stage == MESA_SHADER_VERTEX) { + const struct radv_shader_variant *vs_shader = pipeline->shaders[MESA_SHADER_VERTEX]; + pipeline->can_use_simple_input = vs_shader->info.is_ngg == pipeline->device->physical_device->use_ngg && + vs_shader->info.wave_size == pipeline->device->physical_device->ge_wave_size; + } else { + pipeline->can_use_simple_input = false; + } if (info->vs.dynamic_inputs) - pipeline->vb_desc_usage_mask = BITFIELD_MASK(util_last_bit(info->vs.vb_desc_usage_mask)); + pipeline->vb_desc_usage_mask = BITFIELD_MASK(pipeline->last_vertex_attrib_bit); else pipeline->vb_desc_usage_mask = info->vs.vb_desc_usage_mask; pipeline->vb_desc_alloc_size = util_bitcount(pipeline->vb_desc_usage_mask) * 16; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 161e340530a..754881aa436 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1782,6 +1782,9 @@ struct radv_pipeline { uint32_t attrib_index_offset[MAX_VERTEX_ATTRIBS]; bool use_per_attribute_vb_descs; + bool can_use_simple_input; + uint8_t last_vertex_attrib_bit; + uint8_t next_vertex_stage : 8; uint32_t vb_desc_usage_mask; uint32_t vb_desc_alloc_size; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 0d03538deda..4204e5a5b30 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -381,7 +381,7 @@ struct radv_vs_input_state { }; struct radv_vs_prolog_key { - struct radv_vs_input_state *state; + const struct radv_vs_input_state *state; unsigned num_attributes; uint32_t misaligned_mask; bool as_ls;