panvk: Add support for KHR_push_descriptor

Most of the panvk_descriptor_set logic can be re-used, we just need
to provide intermediate helpers that can be used for both the push set
and regular set population logic (which implies passing our own storage
for the SW descriptor UBO instead of assuming it's always backed by a
BO).

Once this is done, we add temporary storage to the panvk_cmd_buffer
object, and populate the push sets at draw time.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28178>
This commit is contained in:
Boris Brezillon
2024-01-10 15:49:40 +01:00
committed by Marge Bot
parent 160dd5bf2b
commit 21775a459f
7 changed files with 384 additions and 116 deletions

View File

@@ -91,6 +91,7 @@ enum panvk_dynamic_state_bits {
struct panvk_descriptor_state {
uint32_t dirty;
const struct panvk_descriptor_set *sets[MAX_SETS];
struct panvk_push_descriptor_set *push_sets[MAX_SETS];
struct panvk_sysvals sysvals;
struct {
struct panvk_buffer_desc ubos[MAX_DYNAMIC_UNIFORM_BUFFERS];

View File

@@ -14,6 +14,13 @@
#include "vk_object.h"
#include "panvk_macros.h"
#define PANVK_MAX_PUSH_DESCS 32
#define PANVK_MAX_DESC_SIZE 32
#define PANVK_MAX_DESC_UBO_STRIDE 8
struct panvk_cmd_buffer;
struct panvk_descriptor_pool;
struct panvk_descriptor_set_layout;
struct panvk_priv_bo;
@@ -80,10 +87,44 @@ struct panvk_descriptor_set {
void *img_attrib_bufs;
uint32_t *img_fmts;
struct panvk_priv_bo *desc_bo;
struct {
struct panvk_priv_bo *bo;
struct {
uint64_t dev;
void *host;
} addr;
} desc_ubo;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_descriptor_set, base, VkDescriptorSet,
VK_OBJECT_TYPE_DESCRIPTOR_SET)
struct panvk_push_descriptor_set {
struct {
uint8_t descs[PANVK_MAX_PUSH_DESCS * PANVK_MAX_DESC_SIZE];
uint8_t desc_ubo[PANVK_MAX_PUSH_DESCS * PANVK_MAX_DESC_UBO_STRIDE];
uint32_t img_fmts[PANVK_MAX_PUSH_DESCS];
} storage;
struct panvk_descriptor_set set;
};
#ifdef PAN_ARCH
void
panvk_per_arch(push_descriptor_set_assign_layout)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout);
void
panvk_per_arch(push_descriptor_set)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout,
uint32_t write_count, const VkWriteDescriptorSet *writes);
void
panvk_per_arch(push_descriptor_set_with_template)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout,
VkDescriptorUpdateTemplate templ, const void *data);
#endif
#endif

View File

@@ -48,6 +48,7 @@ struct panvk_descriptor_set_binding_layout {
struct panvk_descriptor_set_layout {
struct vk_descriptor_set_layout vk;
VkDescriptorSetLayoutCreateFlags flags;
/* Shader stages affected by this descriptor set */
uint16_t shader_stages;

View File

@@ -71,6 +71,7 @@ get_device_extensions(const struct panvk_physical_device *device,
.KHR_shader_expect_assume = true,
.KHR_storage_buffer_storage_class = true,
.KHR_descriptor_update_template = true,
.KHR_push_descriptor = true,
#ifdef PANVK_USE_WSI_PLATFORM
.KHR_swapchain = true,
#endif
@@ -665,7 +666,8 @@ panvk_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
(VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
properties->maxPushDescriptors = 0;
/* Software limit. */
properties->maxPushDescriptors = 32;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {

View File

@@ -50,6 +50,8 @@
#include "util/rounding.h"
#include "util/u_pack_color.h"
#include "vk_descriptor_update_template.h"
#include "vk_format.h"
struct panvk_draw_info {
@@ -358,6 +360,59 @@ panvk_cmd_prepare_push_constants(
desc_state->push_constants = push_constants.gpu;
}
static void
panvk_cmd_prepare_push_sets(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state)
{
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
const struct panvk_pipeline_layout *playout = pipeline->layout;
for (unsigned i = 0; i < playout->vk.set_count; i++) {
const struct panvk_descriptor_set_layout *slayout =
vk_to_panvk_descriptor_set_layout(playout->vk.set_layouts[i]);
bool is_push_set =
slayout->flags &
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
if (desc_state->sets[i] || !is_push_set || !desc_state->push_sets[i])
continue;
struct panvk_descriptor_set *set = &desc_state->push_sets[i]->set;
panvk_per_arch(push_descriptor_set_assign_layout)(desc_state->push_sets[i],
slayout);
if (slayout->desc_ubo_size) {
struct panfrost_ptr desc_ubo = pan_pool_alloc_aligned(
&cmdbuf->desc_pool.base, slayout->desc_ubo_size, 16);
struct mali_uniform_buffer_packed *ubos = set->ubos;
memcpy(desc_ubo.cpu, set->desc_ubo.addr.host, slayout->desc_ubo_size);
set->desc_ubo.addr.dev = desc_ubo.gpu;
set->desc_ubo.addr.host = desc_ubo.cpu;
pan_pack(&ubos[slayout->desc_ubo_index], UNIFORM_BUFFER, cfg) {
cfg.pointer = set->desc_ubo.addr.dev;
cfg.entries = DIV_ROUND_UP(slayout->desc_ubo_size, 16);
}
}
desc_state->sets[i] = &desc_state->push_sets[i]->set;
}
}
static void
panvk_cmd_unprepare_push_sets(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state)
{
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) {
if (desc_state->push_sets[i] && &desc_state->push_sets[i]->set == desc_state->sets[i])
desc_state->sets[i] = NULL;
}
}
static void
panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf,
struct panvk_cmd_bind_point_state *bind_point_state)
@@ -1270,6 +1325,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
panvk_cmd_prepare_draw_sysvals(cmdbuf, bind_point_state, draw);
panvk_cmd_prepare_push_sets(cmdbuf, bind_point_state);
panvk_cmd_prepare_ubos(cmdbuf, bind_point_state);
panvk_cmd_prepare_textures(cmdbuf, bind_point_state);
panvk_cmd_prepare_samplers(cmdbuf, bind_point_state);
@@ -1309,6 +1365,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
/* Clear the dirty flags all at once */
desc_state->dirty = cmdbuf->state.dirty = 0;
panvk_cmd_unprepare_push_sets(cmdbuf, bind_point_state);
}
VKAPI_ATTR void VKAPI_CALL
@@ -1587,6 +1644,14 @@ panvk_destroy_cmdbuf(struct vk_command_buffer *vk_cmdbuf)
container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
for (unsigned j = 0; j < MAX_SETS; j++) {
if (cmdbuf->bind_points[i].desc_state.push_sets[j])
vk_free(&cmdbuf->vk.pool->alloc,
cmdbuf->bind_points[i].desc_state.push_sets[j]);
}
}
list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
list_del(&batch->node);
util_dynarray_fini(&batch->jobs);
@@ -1692,6 +1757,7 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
dispatch.tsd = batch->tls.gpu;
panvk_cmd_prepare_push_sets(cmdbuf, bind_point_state);
panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state);
dispatch.attributes = desc_state->non_vs_attribs;
dispatch.attribute_bufs = desc_state->non_vs_attrib_bufs;
@@ -1744,6 +1810,7 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
panvk_per_arch(cmd_close_batch)(cmdbuf);
desc_state->dirty = 0;
panvk_cmd_unprepare_push_sets(cmdbuf, bind_point_state);
}
static void
@@ -2260,3 +2327,76 @@ panvk_per_arch(CmdDispatchIndirect)(VkCommandBuffer commandBuffer,
{
panvk_stub();
}
static struct panvk_push_descriptor_set *
panvk_cmd_push_descriptors(struct panvk_cmd_buffer *cmdbuf,
VkPipelineBindPoint bind_point,
uint32_t set)
{
struct panvk_cmd_bind_point_state *bind_point_state =
&cmdbuf->bind_points[bind_point];
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
assert(set < MAX_SETS);
if (unlikely(desc_state->push_sets[set] == NULL)) {
desc_state->push_sets[set] =
vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*desc_state->push_sets[0]),
8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (unlikely(desc_state->push_sets[set] == NULL)) {
vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
return NULL;
}
}
/* Pushing descriptors replaces whatever sets are bound */
desc_state->sets[set] = NULL;
/* Reset all descs to force emission of new tables on the next draw/dispatch.
* TODO: Be smarter and only reset those when required.
*/
desc_state->ubos = 0;
desc_state->textures = 0;
desc_state->samplers = 0;
desc_state->vs_attrib_bufs = desc_state->non_vs_attrib_bufs = 0;
desc_state->vs_attribs = desc_state->non_vs_attribs = 0;
return desc_state->push_sets[set];
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPushDescriptorSetKHR)(
VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout layout, uint32_t set, uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites)
{
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_pipeline_layout, playout, layout);
const struct panvk_descriptor_set_layout *set_layout =
vk_to_panvk_descriptor_set_layout(playout->vk.set_layouts[set]);
struct panvk_push_descriptor_set *push_set =
panvk_cmd_push_descriptors(cmdbuf, pipelineBindPoint, set);
if (!push_set)
return;
panvk_per_arch(push_descriptor_set)(push_set, set_layout,
descriptorWriteCount, pDescriptorWrites);
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPushDescriptorSetWithTemplateKHR)(
VkCommandBuffer commandBuffer,
VkDescriptorUpdateTemplate descriptorUpdateTemplate, VkPipelineLayout layout,
uint32_t set, const void *pData)
{
VK_FROM_HANDLE(vk_descriptor_update_template, template, descriptorUpdateTemplate);
VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
VK_FROM_HANDLE(panvk_pipeline_layout, playout, layout);
const struct panvk_descriptor_set_layout *set_layout =
vk_to_panvk_descriptor_set_layout(playout->vk.set_layouts[set]);
struct panvk_push_descriptor_set *push_set =
panvk_cmd_push_descriptors(cmdbuf, template->bind_point, set);
if (!push_set)
return;
panvk_per_arch(push_descriptor_set_with_template)(
push_set, set_layout, descriptorUpdateTemplate, pData);
}

View File

@@ -140,8 +140,8 @@ panvk_descriptor_set_destroy(struct panvk_device *device,
struct panvk_descriptor_pool *pool,
struct panvk_descriptor_set *set)
{
if (set->desc_bo)
panvk_priv_bo_destroy(set->desc_bo, NULL);
if (set->desc_ubo.bo)
panvk_priv_bo_destroy(set->desc_ubo.bo, NULL);
vk_object_free(&device->vk, NULL, set);
}
@@ -256,16 +256,18 @@ panvk_per_arch(descriptor_set_create)(
vk_object_base_init(&device->vk, &set->base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
if (layout->desc_ubo_size) {
set->desc_bo =
set->desc_ubo.bo =
panvk_priv_bo_create(device, layout->desc_ubo_size, 0, NULL,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!set->desc_bo)
if (!set->desc_ubo.bo)
goto err_free_set;
struct mali_uniform_buffer_packed *ubos = set->ubos;
set->desc_ubo.addr.dev = set->desc_ubo.bo->addr.dev;
set->desc_ubo.addr.host = set->desc_ubo.bo->addr.host;
pan_pack(&ubos[layout->desc_ubo_index], UNIFORM_BUFFER, cfg) {
cfg.pointer = set->desc_bo->addr.dev;
cfg.pointer = set->desc_ubo.addr.dev;
cfg.entries = DIV_ROUND_UP(layout->desc_ubo_size, 16);
}
}
@@ -285,8 +287,8 @@ panvk_per_arch(descriptor_set_create)(
return VK_SUCCESS;
err_free_set:
if (set->desc_bo)
panvk_priv_bo_destroy(set->desc_bo, NULL);
if (set->desc_ubo.bo)
panvk_priv_bo_destroy(set->desc_ubo.bo, NULL);
vk_object_free(&device->vk, NULL, set);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
@@ -332,7 +334,7 @@ panvk_desc_ubo_data(struct panvk_descriptor_set *set, uint32_t binding,
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
return (char *)set->desc_bo->addr.host + binding_layout->desc_ubo_offset +
return (char *)set->desc_ubo.addr.host + binding_layout->desc_ubo_offset +
elem * binding_layout->desc_ubo_stride;
}
@@ -363,11 +365,17 @@ panvk_write_sampler_desc(struct panvk_descriptor_set *set, uint32_t binding,
{
const struct panvk_descriptor_set_binding_layout *binding_layout =
&set->layout->bindings[binding];
bool push_set = set->layout->flags &
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
if (binding_layout->immutable_samplers)
if (binding_layout->immutable_samplers && !push_set)
return;
VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler);
struct panvk_sampler *sampler =
binding_layout->immutable_samplers
? binding_layout->immutable_samplers[elem]
: panvk_sampler_from_handle(pImageInfo->sampler);
panvk_write_sampler_desc_raw(set, binding, elem, sampler);
}
@@ -627,16 +635,10 @@ panvk_copy_dyn_ssbo_desc(struct panvk_descriptor_set *dst_set,
*panvk_dyn_ssbo_desc(src_set, src_binding, src_elem);
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(UpdateDescriptorSets)(
VkDevice _device, uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies)
static void
panvk_descriptor_set_write(struct panvk_descriptor_set *set,
const VkWriteDescriptorSet *write)
{
for (unsigned i = 0; i < descriptorWriteCount; i++) {
const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
VK_FROM_HANDLE(panvk_descriptor_set, set, write->dstSet);
switch (write->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
@@ -725,6 +727,19 @@ panvk_per_arch(UpdateDescriptorSets)(
default:
unreachable("Unsupported descriptor type");
}
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(UpdateDescriptorSets)(
VkDevice _device, uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies)
{
for (unsigned i = 0; i < descriptorWriteCount; i++) {
const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
VK_FROM_HANDLE(panvk_descriptor_set, set, write->dstSet);
panvk_descriptor_set_write(set, write);
}
for (unsigned i = 0; i < descriptorCopyCount; i++) {
@@ -828,21 +843,15 @@ panvk_per_arch(UpdateDescriptorSets)(
}
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(UpdateDescriptorSetWithTemplate)(
VkDevice _device, VkDescriptorSet descriptorSet,
VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *data)
static void
panvk_descriptor_set_update_with_template(struct panvk_descriptor_set *set,
VkDescriptorUpdateTemplate templ,
const void *data)
{
VK_FROM_HANDLE(panvk_descriptor_set, set, descriptorSet);
VK_FROM_HANDLE(vk_descriptor_update_template, template,
descriptorUpdateTemplate);
const struct panvk_descriptor_set_layout *layout = set->layout;
VK_FROM_HANDLE(vk_descriptor_update_template, template, templ);
for (uint32_t i = 0; i < template->entry_count; i++) {
const struct vk_descriptor_template_entry *entry = &template->entries[i];
const struct panvk_descriptor_set_binding_layout *binding_layout =
&layout->bindings[entry->binding];
switch (entry->type) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
@@ -853,10 +862,8 @@ panvk_per_arch(UpdateDescriptorSetWithTemplate)(
const VkDescriptorImageInfo *info =
data + entry->offset + j * entry->stride;
if ((entry->type == VK_DESCRIPTOR_TYPE_SAMPLER ||
entry->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
!binding_layout->immutable_samplers) {
if (entry->type == VK_DESCRIPTOR_TYPE_SAMPLER ||
entry->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
panvk_write_sampler_desc(set, entry->binding,
entry->array_element + j, info);
}
@@ -864,7 +871,6 @@ panvk_per_arch(UpdateDescriptorSetWithTemplate)(
if (entry->type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
entry->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
entry->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
panvk_write_tex_desc(set, entry->binding,
entry->array_element + j, info);
}
@@ -943,3 +949,78 @@ panvk_per_arch(UpdateDescriptorSetWithTemplate)(
}
}
}
VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(UpdateDescriptorSetWithTemplate)(
VkDevice _device, VkDescriptorSet descriptorSet,
VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *data)
{
VK_FROM_HANDLE(panvk_descriptor_set, set, descriptorSet);
panvk_descriptor_set_update_with_template(set, descriptorUpdateTemplate, data);
}
void
panvk_per_arch(push_descriptor_set_assign_layout)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout)
{
ASSERTED unsigned num_descs = layout->num_samplers + layout->num_textures +
layout->num_ubos + layout->num_imgs;
struct panvk_descriptor_set *set = &push_set->set;
unsigned desc_offset = 0;
set->layout = layout;
assert(layout->num_dyn_ubos == 0);
assert(layout->num_dyn_ssbos == 0);
assert(num_descs <= PANVK_MAX_PUSH_DESCS);
assert(layout->desc_ubo_size <= sizeof(push_set->storage.desc_ubo));
if (layout->num_ubos) {
set->ubos = (void *)(push_set->storage.descs + desc_offset);
desc_offset += PANVK_MAX_DESC_SIZE * layout->num_ubos;
}
if (layout->num_samplers) {
set->samplers = (void *)(push_set->storage.descs + desc_offset);
desc_offset += PANVK_MAX_DESC_SIZE * layout->num_samplers;
}
if (layout->num_textures) {
set->textures = (void *)(push_set->storage.descs + desc_offset);
desc_offset += PANVK_MAX_DESC_SIZE * layout->num_textures;
}
if (layout->num_imgs) {
set->img_attrib_bufs = (void *)(push_set->storage.descs + desc_offset);
desc_offset += PANVK_MAX_DESC_SIZE * layout->num_imgs;
set->img_fmts = push_set->storage.img_fmts;
}
if (layout->desc_ubo_size)
set->desc_ubo.addr.host = push_set->storage.desc_ubo;
}
void
panvk_per_arch(push_descriptor_set)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout,
uint32_t write_count, const VkWriteDescriptorSet *writes)
{
panvk_per_arch(push_descriptor_set_assign_layout)(push_set, layout);
for (unsigned i = 0; i < write_count; i++) {
const VkWriteDescriptorSet *write = &writes[i];
panvk_descriptor_set_write(&push_set->set, write);
}
}
void
panvk_per_arch(push_descriptor_set_with_template)(
struct panvk_push_descriptor_set *push_set,
const struct panvk_descriptor_set_layout *layout,
VkDescriptorUpdateTemplate templ, const void *data)
{
panvk_per_arch(push_descriptor_set_assign_layout)(push_set, layout);
panvk_descriptor_set_update_with_template(&push_set->set, templ, data);
}

View File

@@ -126,6 +126,8 @@ panvk_per_arch(CreateDescriptorSetLayout)(
goto err_free_bindings;
}
set_layout->flags = pCreateInfo->flags;
struct panvk_sampler **immutable_samplers =
(struct panvk_sampler **)((uint8_t *)set_layout + sizeof(*set_layout) +
(sizeof(