nvk: Be much more conservative about rebinding cbufs

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:
Faith Ekstrand
2024-05-15 15:32:21 -05:00
committed by Marge Bot
parent 8b5835af31
commit 091a945b57
4 changed files with 108 additions and 24 deletions

View File

@@ -595,6 +595,53 @@ nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
}
}
#define NVK_VK_GRAPHICS_STAGE_BITS VK_SHADER_STAGE_ALL_GRAPHICS
void
nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer *cmd,
VkShaderStageFlags stages,
uint32_t sets_start, uint32_t sets_end,
uint32_t dyn_start, uint32_t dyn_end)
{
if (!(stages & VK_SHADER_STAGE_ALL_GRAPHICS))
return;
uint32_t groups = 0;
u_foreach_bit(i, stages & VK_SHADER_STAGE_ALL_GRAPHICS) {
gl_shader_stage stage = vk_to_mesa_shader_stage(1 << i);
uint32_t g = nvk_cbuf_binding_for_stage(stage);
groups |= BITFIELD_BIT(g);
}
u_foreach_bit(g, groups) {
struct nvk_cbuf_group *group = &cmd->state.gfx.cbuf_groups[g];
for (uint32_t i = 0; i < ARRAY_SIZE(group->cbufs); i++) {
const struct nvk_cbuf *cbuf = &group->cbufs[i];
switch (cbuf->type) {
case NVK_CBUF_TYPE_INVALID:
case NVK_CBUF_TYPE_ROOT_DESC:
case NVK_CBUF_TYPE_SHADER_DATA:
break;
case NVK_CBUF_TYPE_DESC_SET:
case NVK_CBUF_TYPE_UBO_DESC:
if (cbuf->desc_set >= sets_start && cbuf->desc_set < sets_end)
group->dirty |= BITFIELD_BIT(i);
break;
case NVK_CBUF_TYPE_DYNAMIC_UBO:
if (cbuf->dynamic_idx >= dyn_start && cbuf->dynamic_idx < dyn_end)
group->dirty |= BITFIELD_BIT(i);
break;
default:
unreachable("Invalid cbuf type");
}
}
}
}
static void
nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
struct nvk_descriptor_state *desc,
@@ -621,8 +668,9 @@ nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
* range and it's only our responsibility to adjust all
* set_dynamic_buffer_start[p] for p > s as needed.
*/
uint8_t dyn_buffer_start =
const uint8_t dyn_buffer_start =
desc->root.set_dynamic_buffer_start[info->firstSet];
uint8_t dyn_buffer_end = dyn_buffer_start;
uint32_t next_dyn_offset = 0;
for (uint32_t i = 0; i < info->descriptorSetCount; ++i) {
@@ -638,7 +686,7 @@ nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
desc->sets[s] = set;
}
desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start;
desc->root.set_dynamic_buffer_start[s] = dyn_buffer_end;
if (pipeline_layout->set_layouts[s] != NULL) {
const struct nvk_descriptor_set_layout *set_layout =
@@ -655,22 +703,26 @@ nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
} else {
db.addr.base_addr += offset;
}
desc->root.dynamic_buffers[dyn_buffer_start + j] = db;
desc->root.dynamic_buffers[dyn_buffer_end + j] = db;
}
next_dyn_offset += set->layout->dynamic_buffer_count;
}
dyn_buffer_start += set_layout->dynamic_buffer_count;
dyn_buffer_end += set_layout->dynamic_buffer_count;
} else {
assert(set == NULL);
}
}
assert(dyn_buffer_start <= NVK_MAX_DYNAMIC_BUFFERS);
assert(dyn_buffer_end <= NVK_MAX_DYNAMIC_BUFFERS);
assert(next_dyn_offset <= info->dynamicOffsetCount);
for (uint32_t s = info->firstSet + info->descriptorSetCount;
s < NVK_MAX_SETS; s++)
desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start;
desc->root.set_dynamic_buffer_start[s] = dyn_buffer_end;
nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags, info->firstSet,
info->firstSet + info->descriptorSetCount,
dyn_buffer_start, dyn_buffer_end);
}
VKAPI_ATTR void VKAPI_CALL
@@ -679,7 +731,7 @@ nvk_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,
{
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
if (pBindDescriptorSetsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
nvk_bind_descriptor_sets(cmd, &cmd->state.gfx.descriptors,
pBindDescriptorSetsInfo);
}
@@ -705,7 +757,7 @@ nvk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
{
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
if (pPushConstantsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS)
nvk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo);
if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
@@ -754,6 +806,9 @@ nvk_push_descriptor_set(struct nvk_cmd_buffer *cmd,
nvk_push_descriptor_set_update(dev, push_set, set_layout,
info->descriptorWriteCount,
info->pDescriptorWrites);
nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags,
info->set, info->set + 1, 0, 0);
}
VKAPI_ATTR void VKAPI_CALL
@@ -762,7 +817,7 @@ nvk_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,
{
VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
if (pPushDescriptorSetInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
nvk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors,
pPushDescriptorSetInfo);
}

View File

@@ -11,6 +11,7 @@
#include "nvk_cmd_pool.h"
#include "nvk_descriptor_set.h"
#include "nvk_image.h"
#include "nvk_shader.h"
#include "util/u_dynarray.h"
@@ -115,6 +116,11 @@ struct nvk_graphics_state {
uint32_t shaders_dirty;
struct nvk_shader *shaders[MESA_SHADER_MESH + 1];
struct nvk_cbuf_group {
uint16_t dirty;
struct nvk_cbuf cbufs[16];
} cbuf_groups[5];
/* Used for meta save/restore */
struct nvk_addr_range vb0;
@@ -231,6 +237,10 @@ void nvk_cmd_bind_graphics_shader(struct nvk_cmd_buffer *cmd,
void nvk_cmd_bind_compute_shader(struct nvk_cmd_buffer *cmd,
struct nvk_shader *shader);
void nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer *cmd,
VkShaderStageFlags stages,
uint32_t sets_start, uint32_t sets_end,
uint32_t dyn_start, uint32_t dyn_end);
void nvk_cmd_bind_vertex_buffer(struct nvk_cmd_buffer *cmd, uint32_t vb_idx,
struct nvk_addr_range addr_range);

View File

@@ -1176,10 +1176,22 @@ nvk_flush_shaders(struct nvk_cmd_buffer *cmd)
/* Only copy non-NULL shaders because mesh/task alias with vertex and
* tessellation stages.
*/
if (cmd->state.gfx.shaders[stage] != NULL) {
struct nvk_shader *shader = cmd->state.gfx.shaders[stage];
if (shader != NULL) {
assert(type < ARRAY_SIZE(type_shader));
assert(type_shader[type] == NULL);
type_shader[type] = cmd->state.gfx.shaders[stage];
type_shader[type] = shader;
const struct nvk_cbuf_map *cbuf_map = &shader->cbuf_map;
struct nvk_cbuf_group *cbuf_group =
&cmd->state.gfx.cbuf_groups[nvk_cbuf_binding_for_stage(stage)];
for (uint32_t i = 0; i < cbuf_map->cbuf_count; i++) {
if (memcmp(&cbuf_group->cbufs[i], &cbuf_map->cbufs[i],
sizeof(cbuf_group->cbufs[i])) != 0) {
cbuf_group->cbufs[i] = cbuf_map->cbufs[i];
cbuf_group->dirty |= BITFIELD_BIT(i);
}
}
}
}
@@ -2369,22 +2381,26 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
cbuf_shaders[group] = shader;
}
uint32_t root_cbuf_count = 0;
for (uint32_t group = 0; group < ARRAY_SIZE(cbuf_shaders); group++) {
if (cbuf_shaders[group] == NULL)
for (uint32_t g = 0; g < ARRAY_SIZE(cbuf_shaders); g++) {
if (cbuf_shaders[g] == NULL)
continue;
const struct nvk_shader *shader = cbuf_shaders[group];
const struct nvk_shader *shader = cbuf_shaders[g];
const struct nvk_cbuf_map *cbuf_map = &shader->cbuf_map;
struct nvk_cbuf_group *group = &cmd->state.gfx.cbuf_groups[g];
for (uint32_t c = 0; c < cbuf_map->cbuf_count; c++) {
const struct nvk_cbuf *cbuf = &cbuf_map->cbufs[c];
/* We only bother to re-bind cbufs that are in use */
const uint32_t rebind =
group->dirty & BITFIELD_MASK(cbuf_map->cbuf_count);
if (!rebind)
continue;
u_foreach_bit(c, rebind) {
const struct nvk_cbuf *cbuf = &group->cbufs[c];
/* We bind these at the very end */
if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC) {
root_cbuf_count++;
if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC)
continue;
}
struct nvk_buffer_address ba;
if (nvk_cmd_buffer_get_cbuf_addr(cmd, desc, shader, cbuf, &ba)) {
@@ -2401,7 +2417,7 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_C(p, ba.base_addr);
}
P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(group), {
P_IMMD(p, NV9097, BIND_GROUP_CONSTANT_BUFFER(g), {
.valid = ba.size > 0,
.shader_slot = c,
});
@@ -2413,20 +2429,22 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_BIND_CBUF_DESC));
P_INLINE_DATA(p, group | (c << 4));
P_INLINE_DATA(p, g | (c << 4));
P_INLINE_DATA(p, desc_addr >> 32);
P_INLINE_DATA(p, desc_addr);
} else {
struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_BIND_CBUF_DESC));
P_INLINE_DATA(p, group | (c << 4));
P_INLINE_DATA(p, g | (c << 4));
nv_push_update_count(p, 3);
nvk_cmd_buffer_push_indirect(cmd, desc_addr, 3);
}
}
}
group->dirty &= ~rebind;
}
/* We bind all root descriptors last so that CONSTANT_BUFFER_SELECTOR is
@@ -2434,7 +2452,7 @@ nvk_flush_descriptors(struct nvk_cmd_buffer *cmd)
* parameters and similar MME root table updates always hit the root
* descriptor table and not some random UBO.
*/
struct nv_push *p = nvk_cmd_buffer_push(cmd, 4 + 2 * root_cbuf_count);
struct nv_push *p = nvk_cmd_buffer_push(cmd, 14);
P_MTHD(p, NV9097, SET_CONSTANT_BUFFER_SELECTOR_A);
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_A(p, sizeof(desc->root));
P_NV9097_SET_CONSTANT_BUFFER_SELECTOR_B(p, root_desc_addr >> 32);

View File

@@ -147,6 +147,7 @@ nvk_meta_end(struct nvk_cmd_buffer *cmd,
*desc->push[0] = save->push_desc0;
desc->push_dirty |= BITFIELD_BIT(0);
}
nvk_cmd_dirty_cbufs_for_descriptors(cmd, ~0, 0, 1, 0, 0);
/* Restore set_dynaic_buffer_start because meta binding set 0 can disturb
* all dynamic buffers starts for all sets.