diff --git a/src/panfrost/vulkan/panvk_private.h b/src/panfrost/vulkan/panvk_private.h index 8f0864459ed..74686d0d2b0 100644 --- a/src/panfrost/vulkan/panvk_private.h +++ b/src/panfrost/vulkan/panvk_private.h @@ -580,6 +580,7 @@ struct panvk_attrib_buf_info { struct { unsigned stride; bool per_instance; + uint32_t instance_divisor; }; unsigned special_id; }; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c index f75030c5edc..a5a14cb2d28 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c @@ -781,7 +781,7 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, draw, bufs.cpu); - panvk_per_arch(emit_attribs)(cmdbuf->device, &pipeline->attribs, + panvk_per_arch(emit_attribs)(cmdbuf->device, draw, &pipeline->attribs, cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, attribs.cpu); diff --git a/src/panfrost/vulkan/panvk_vX_cs.c b/src/panfrost/vulkan/panvk_vX_cs.c index d537510cdeb..3bc37ac9a1a 100644 --- a/src/panfrost/vulkan/panvk_vX_cs.c +++ b/src/panfrost/vulkan/panvk_vX_cs.c @@ -232,23 +232,63 @@ panvk_emit_attrib_buf(const struct panvk_attribs_info *info, assert(idx < buf_count); const struct panvk_attrib_buf *buf = &bufs[idx]; - unsigned divisor = buf_info->per_instance ? - draw->padded_vertex_count : 0; - unsigned stride = divisor && draw->instance_count == 1 ? - 0 : buf_info->stride; mali_ptr addr = buf->address & ~63ULL; unsigned size = buf->size + (buf->address & 63); + unsigned divisor = + draw->padded_vertex_count * buf_info->instance_divisor; /* TODO: support instanced arrays */ - pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { - if (draw->instance_count > 1 && divisor) { + if (draw->instance_count <= 1) { + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D; + cfg.stride = buf_info->per_instance ? 0 : buf_info->stride; + cfg.pointer = addr; + cfg.size = size; + } + } else if (!buf_info->per_instance) { + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS; - cfg.divisor = divisor; + cfg.divisor = draw->padded_vertex_count; + cfg.stride = buf_info->stride; + cfg.pointer = addr; + cfg.size = size; + } + } else if (!divisor) { + /* instance_divisor == 0 means all instances share the same value. + * Make it a 1D array with a zero stride. + */ + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D; + cfg.stride = 0; + cfg.pointer = addr; + cfg.size = size; + } + } else if (util_is_power_of_two_or_zero(divisor)) { + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR; + cfg.stride = buf_info->stride; + cfg.pointer = addr; + cfg.size = size; + cfg.divisor_r = __builtin_ctz(divisor); + } + } else { + unsigned divisor_r = 0, divisor_e = 0; + unsigned divisor_num = + panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e); + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR; + cfg.stride = buf_info->stride; + cfg.pointer = addr; + cfg.size = size; + cfg.divisor_r = divisor_r; + cfg.divisor_e = divisor_e; } - cfg.pointer = addr; - cfg.stride = stride; - cfg.size = size; + desc += pan_size(ATTRIBUTE_BUFFER); + pan_pack(desc, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) { + cfg.divisor_numerator = divisor_num; + cfg.divisor = buf_info->instance_divisor; + } } } @@ -261,8 +301,10 @@ panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info, { struct mali_attribute_buffer_packed *buf = descs; - for (unsigned i = 0; i < info->buf_count; i++) - panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf++); + for (unsigned i = 0; i < info->buf_count; i++) { + panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf); + buf += 2; + } } void @@ -295,23 +337,31 @@ panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo, static void panvk_emit_attrib(const struct panvk_device *dev, + const struct panvk_draw_info *draw, const struct panvk_attribs_info *attribs, const struct panvk_attrib_buf *bufs, unsigned buf_count, unsigned idx, void *attrib) { const struct panfrost_device *pdev = &dev->physical_device->pdev; + unsigned buf_idx = attribs->attrib[idx].buf; + const struct panvk_attrib_buf_info *buf_info = &attribs->buf[buf_idx]; pan_pack(attrib, ATTRIBUTE, cfg) { - cfg.buffer_index = attribs->attrib[idx].buf; + cfg.buffer_index = buf_idx * 2; cfg.offset = attribs->attrib[idx].offset + (bufs[cfg.buffer_index].address & 63); + + if (buf_info->per_instance) + cfg.offset += draw->first_instance * buf_info->stride; + cfg.format = pdev->formats[attribs->attrib[idx].format].hw; } } void panvk_per_arch(emit_attribs)(const struct panvk_device *dev, + const struct panvk_draw_info *draw, const struct panvk_attribs_info *attribs, const struct panvk_attrib_buf *bufs, unsigned buf_count, @@ -320,7 +370,7 @@ panvk_per_arch(emit_attribs)(const struct panvk_device *dev, struct mali_attribute_packed *attrib = descs; for (unsigned i = 0; i < attribs->attrib_count; i++) - panvk_emit_attrib(dev, attribs, bufs, buf_count, i, attrib++); + panvk_emit_attrib(dev, draw, attribs, bufs, buf_count, i, attrib++); } void diff --git a/src/panfrost/vulkan/panvk_vX_cs.h b/src/panfrost/vulkan/panvk_vX_cs.h index f551410782b..14b34497044 100644 --- a/src/panfrost/vulkan/panvk_vX_cs.h +++ b/src/panfrost/vulkan/panvk_vX_cs.h @@ -57,6 +57,7 @@ panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info, void panvk_per_arch(emit_attribs)(const struct panvk_device *dev, + const struct panvk_draw_info *draw, const struct panvk_attribs_info *attribs, const struct panvk_attrib_buf *bufs, unsigned buf_count, diff --git a/src/panfrost/vulkan/panvk_vX_pipeline.c b/src/panfrost/vulkan/panvk_vX_pipeline.c index 3c8cd0fe9d6..70daa5d5a8f 100644 --- a/src/panfrost/vulkan/panvk_vX_pipeline.c +++ b/src/panfrost/vulkan/panvk_vX_pipeline.c @@ -877,6 +877,9 @@ panvk_pipeline_builder_parse_vertex_input(struct panvk_pipeline_builder *builder &info->pVertexBindingDescriptions[i]; attribs->buf_count = MAX2(desc->binding + 1, attribs->buf_count); attribs->buf[desc->binding].stride = desc->stride; + attribs->buf[desc->binding].per_instance = + desc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE; + attribs->buf[desc->binding].instance_divisor = 1; attribs->buf[desc->binding].special = false; }