panvk/csf: Fix instance attribute offseting
Letting the shader offset instanceID by baseInstance works only if
the divisor is one. If the divisor is greater than one, the firstInstance
parameter shouldn't be applied this divisor, but it currently is. Zero
divisors are also problematic, in that they will force use of the
instance zero attribute all the time.
The only way to fix that is to tweak the offsets of the per-instance
attributes instead, like is done in the JM backend.
Fixes: 1570f0172e
("panvk: Fix base_{instance,vertex} handling")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Olivia Lee <benjamin.lee@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34642>
(cherry picked from commit b2a8e3838d6f70d94d785622ea89fd65acb0d322)
This commit is contained in:

committed by
Eric Engestrom

parent
41a2ccc117
commit
f127f9ca88
@@ -224,7 +224,7 @@
|
||||
"description": "panvk/csf: Fix instance attribute offseting",
|
||||
"nominated": true,
|
||||
"nomination_type": 2,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "1570f0172e02aa660bb0b2619d67c1cff52b3914",
|
||||
"notes": null
|
||||
|
@@ -60,6 +60,12 @@ emit_vs_attrib(struct panvk_cmd_buffer *cmdbuf,
|
||||
|
||||
pan_pack(desc, ATTRIBUTE, cfg) {
|
||||
cfg.offset = attrib_info->offset;
|
||||
|
||||
if (per_instance) {
|
||||
cfg.offset +=
|
||||
cmdbuf->state.gfx.sysvals.vs.base_instance * buf_info->stride;
|
||||
}
|
||||
|
||||
cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw;
|
||||
cfg.table = 0;
|
||||
cfg.buffer_index = buf_idx;
|
||||
@@ -115,8 +121,19 @@ prepare_vs_driver_set(struct panvk_cmd_buffer *cmdbuf)
|
||||
cmdbuf->vk.dynamic_graphics_state.vi;
|
||||
uint32_t vb_count = 0;
|
||||
|
||||
u_foreach_bit(i, vi->attributes_valid)
|
||||
cmdbuf->state.gfx.vi.attribs_changing_on_base_instance = 0;
|
||||
u_foreach_bit(i, vi->attributes_valid) {
|
||||
const struct vk_vertex_binding_state *binding =
|
||||
&vi->bindings[vi->attributes[i].binding];
|
||||
|
||||
if (binding->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE &&
|
||||
binding->stride != 0) {
|
||||
cmdbuf->state.gfx.vi.attribs_changing_on_base_instance |=
|
||||
BITFIELD_BIT(i);
|
||||
}
|
||||
|
||||
vb_count = MAX2(vi->attributes[i].binding + 1, vb_count);
|
||||
}
|
||||
|
||||
uint32_t vb_offset = vs->desc_info.dyn_bufs.count + MAX_VS_ATTRIBS + 1;
|
||||
uint32_t desc_count = vb_offset + vb_count;
|
||||
@@ -1900,6 +1917,14 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
/* Changes to base_instance will modify the offset of per-instance
|
||||
* attributes, so we manually invalidate the VI state to trigger a
|
||||
* new attribute table generation in that case. */
|
||||
if ((draw->instance.base != cmdbuf->state.gfx.sysvals.vs.base_instance ||
|
||||
draw->indirect.buffer_dev_addr) &&
|
||||
cmdbuf->state.gfx.vi.attribs_changing_on_base_instance != 0)
|
||||
BITSET_SET(cmdbuf->vk.dynamic_graphics_state.dirty, MESA_VK_DYNAMIC_VI);
|
||||
|
||||
panvk_per_arch(cmd_prepare_draw_sysvals)(cmdbuf, draw);
|
||||
|
||||
result = prepare_push_uniforms(cmdbuf);
|
||||
@@ -2164,7 +2189,13 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
uint32_t patch_attribs =
|
||||
cmdbuf->state.gfx.vi.attribs_changing_on_base_instance;
|
||||
struct cs_index draw_params_addr = cs_scratch_reg64(b, 0);
|
||||
struct cs_index vs_drv_set = cs_scratch_reg64(b, 2);
|
||||
struct cs_index attrib_offset = cs_scratch_reg32(b, 4);
|
||||
struct cs_index multiplicand = cs_scratch_reg32(b, 5);
|
||||
|
||||
cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr);
|
||||
|
||||
cs_update_vt_ctx(b) {
|
||||
@@ -2200,6 +2231,60 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
}
|
||||
|
||||
if (patch_attribs != 0) {
|
||||
struct panvk_shader_desc_state *vs_desc_state =
|
||||
&cmdbuf->state.gfx.vs.desc;
|
||||
const struct vk_vertex_input_state *vi =
|
||||
cmdbuf->vk.dynamic_graphics_state.vi;
|
||||
|
||||
cs_move64_to(b, vs_drv_set, vs_desc_state->driver_set.dev_addr);
|
||||
|
||||
/* If firstInstance=0, skip the offset adjustment. */
|
||||
cs_if(b, MALI_CS_CONDITION_NEQUAL,
|
||||
cs_sr_reg32(b, IDVS, INSTANCE_OFFSET)) {
|
||||
u_foreach_bit(i, patch_attribs) {
|
||||
const struct vk_vertex_attribute_state *attrib_info =
|
||||
&vi->attributes[i];
|
||||
const struct vk_vertex_binding_state *binding =
|
||||
&vi->bindings[attrib_info->binding];
|
||||
|
||||
cs_load32_to(b, attrib_offset, vs_drv_set,
|
||||
pan_size(ATTRIBUTE) * i + (2 * sizeof(uint32_t)));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
|
||||
/* Emulated immediate multiply: we walk the bits in
|
||||
* base_instance, and accumulate (stride << bit_pos) if the bit
|
||||
* is present. This is sub-optimal, but it's simple :-). */
|
||||
cs_add32(b, multiplicand, cs_sr_reg32(b, IDVS, INSTANCE_OFFSET), 0);
|
||||
for (uint32_t i = 31; i > 0; i--) {
|
||||
uint32_t add = binding->stride << i;
|
||||
|
||||
/* bit31 is the sign bit, so we don't need to subtract to
|
||||
* check the presence of the bit. */
|
||||
if (i < 31)
|
||||
cs_add32(b, multiplicand, multiplicand, -(1 << i));
|
||||
|
||||
if (add) {
|
||||
cs_if(b, MALI_CS_CONDITION_LESS, multiplicand)
|
||||
cs_add32(b, multiplicand, multiplicand, 1 << i);
|
||||
cs_else(b)
|
||||
cs_add32(b, attrib_offset, attrib_offset, add);
|
||||
} else {
|
||||
cs_if(b, MALI_CS_CONDITION_LESS, multiplicand)
|
||||
cs_add32(b, multiplicand, multiplicand, 1 << i);
|
||||
}
|
||||
}
|
||||
|
||||
cs_if(b, MALI_CS_CONDITION_NEQUAL, multiplicand)
|
||||
cs_add32(b, attrib_offset, attrib_offset, binding->stride);
|
||||
|
||||
cs_store32(b, attrib_offset, vs_drv_set,
|
||||
pan_size(ATTRIBUTE) * i + (2 * sizeof(uint32_t)));
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* NIR expects zero-based instance ID, but even if it did have an intrinsic to
|
||||
* load the absolute instance ID, we'd want to keep it zero-based to work around
|
||||
* Mali's limitation on non-zero firstInstance when a instance divisor is used.
|
||||
|
@@ -146,6 +146,12 @@ struct panvk_cmd_graphics_state {
|
||||
unsigned count;
|
||||
} vb;
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
struct {
|
||||
uint32_t attribs_changing_on_base_instance;
|
||||
} vi;
|
||||
#endif
|
||||
|
||||
/* Index buffer */
|
||||
struct {
|
||||
struct panvk_buffer *buffer;
|
||||
|
@@ -203,9 +203,7 @@ panvk_lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
PAN_ARCH <= 7 ?
|
||||
nir_load_raw_vertex_id_pan(b) :
|
||||
nir_load_vertex_id(b),
|
||||
PAN_ARCH >= 9 ?
|
||||
nir_iadd(b, nir_load_instance_id(b), nir_load_base_instance(b)) :
|
||||
nir_load_instance_id(b),
|
||||
nir_load_instance_id(b),
|
||||
nir_get_io_offset_src(intrin)->ssa,
|
||||
.base = nir_intrinsic_base(intrin),
|
||||
.component = nir_intrinsic_component(intrin),
|
||||
|
Reference in New Issue
Block a user