panvk: Fix base_{instance,vertex} handling
At the moment, we're not initializing the base_{instance,vertex} sysvals which we'll need to do if we want to support shader draw parameters. It turns out even without shader draw parameters enabled, some shaders need a valid base_instance value, so this alone should fix a few tests. On CSF hardware, we have a way to pass a non-zero base instance that's propagated to the instance ID, but this messes with instance divisors, so instead of using the native base instance feature, we force it to zero, pass the base instance through an FAU sysval, and let panvk_lower_load_vs_input() do the lowering for vertex attribute loads. Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32039>
This commit is contained in:

committed by
Marge Bot

parent
c7c04e13e3
commit
1570f0172e
@@ -223,7 +223,8 @@ prepare_fs_driver_set(struct panvk_cmd_buffer *cmdbuf)
|
||||
#define MIN_DEPTH_CLIP_RANGE 37.7E-06f
|
||||
|
||||
static void
|
||||
prepare_sysvals(struct panvk_cmd_buffer *cmdbuf)
|
||||
prepare_sysvals(struct panvk_cmd_buffer *cmdbuf,
|
||||
const struct panvk_draw_info *draw)
|
||||
{
|
||||
struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals;
|
||||
struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
|
||||
@@ -304,6 +305,15 @@ prepare_sysvals(struct panvk_cmd_buffer *cmdbuf)
|
||||
}
|
||||
gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS);
|
||||
}
|
||||
|
||||
if (draw->vertex.base != sysvals->vs.first_vertex ||
|
||||
draw->vertex.base != sysvals->vs.base_vertex ||
|
||||
draw->instance.base != sysvals->vs.base_instance) {
|
||||
sysvals->vs.first_vertex = draw->vertex.base;
|
||||
sysvals->vs.base_vertex = draw->vertex.base;
|
||||
sysvals->vs.base_instance = draw->instance.base;
|
||||
gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -1654,7 +1664,7 @@ prepare_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||
return result;
|
||||
}
|
||||
|
||||
prepare_sysvals(cmdbuf);
|
||||
prepare_sysvals(cmdbuf, draw);
|
||||
|
||||
result = prepare_push_uniforms(cmdbuf);
|
||||
if (result != VK_SUCCESS)
|
||||
@@ -1755,7 +1765,11 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||
cs_move32_to(b, cs_sr_reg32(b, 34), draw->instance.count);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 35), draw->index.offset);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 36), draw->vertex.base);
|
||||
cs_move32_to(b, cs_sr_reg32(b, 37), draw->instance.base);
|
||||
/* NIR expects zero-based instance ID, but even if it did have an intrinsic to
|
||||
* load the absolute instance ID, we'd want to keep it zero-based to work around
|
||||
* Mali's limitation on non-zero firstInstance when a instance divisor is used.
|
||||
*/
|
||||
cs_move32_to(b, cs_sr_reg32(b, 37), 0);
|
||||
}
|
||||
|
||||
struct mali_primitive_flags_packed flags_override =
|
||||
@@ -1891,6 +1905,9 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
||||
/* MultiDrawIndirect (.maxDrawIndirectCount) needs additional changes. */
|
||||
assert(draw->indirect.draw_count == 1);
|
||||
|
||||
/* Force a new push uniform block to be allocated */
|
||||
gfx_state_set_dirty(cmdbuf, PUSH_UNIFORMS);
|
||||
|
||||
result = prepare_draw(cmdbuf, draw);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
@@ -1907,12 +1924,31 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
||||
cs_load_to(b, cs_sr_reg_tuple(b, 33, 5), draw_params_addr, reg_mask, 0);
|
||||
}
|
||||
|
||||
struct mali_primitive_flags_packed flags_override =
|
||||
get_tiler_flags_override(draw);
|
||||
|
||||
/* Wait for the SR33-37 indirect buffer load. */
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
|
||||
struct cs_index fau_block_addr = cs_scratch_reg64(b, 2);
|
||||
cs_move64_to(b, fau_block_addr, cmdbuf->state.gfx.push_uniforms);
|
||||
cs_store32(b, cs_sr_reg32(b, 36), fau_block_addr,
|
||||
256 + offsetof(struct panvk_graphics_sysvals, vs.first_vertex));
|
||||
cs_store32(b, cs_sr_reg32(b, 36), fau_block_addr,
|
||||
256 + offsetof(struct panvk_graphics_sysvals, vs.base_vertex));
|
||||
cs_store32(b, cs_sr_reg32(b, 37), fau_block_addr,
|
||||
256 + offsetof(struct panvk_graphics_sysvals, vs.base_instance));
|
||||
|
||||
/* Wait for the store using SR-37 as src to finish, so we can overwrite it. */
|
||||
cs_wait_slot(b, SB_ID(LS), false);
|
||||
|
||||
/* NIR expects zero-based instance ID, but even if it did have an intrinsic to
|
||||
* load the absolute instance ID, we'd want to keep it zero-based to work around
|
||||
* Mali's limitation on non-zero firstInstance when a instance divisor is used.
|
||||
*/
|
||||
cs_update_vt_ctx(b)
|
||||
cs_move32_to(b, cs_sr_reg32(b, 37), 0);
|
||||
|
||||
struct mali_primitive_flags_packed flags_override =
|
||||
get_tiler_flags_override(draw);
|
||||
|
||||
cs_req_res(b, CS_IDVS_RES);
|
||||
cs_trace_run_idvs(b, tracing_ctx, cs_scratch_reg_tuple(b, 0, 4),
|
||||
flags_override.opaque[0], false, true,
|
||||
|
@@ -139,6 +139,31 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
panvk_lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
UNUSED void *data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_input)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
nir_def *ld_attr = nir_load_attribute_pan(
|
||||
b, intrin->def.num_components, intrin->def.bit_size,
|
||||
b->shader->options->vertex_id_zero_based ?
|
||||
nir_load_vertex_id_zero_base(b) :
|
||||
nir_load_vertex_id(b),
|
||||
PAN_ARCH >= 9 ?
|
||||
nir_iadd(b, nir_load_instance_id(b), nir_load_base_instance(b)) :
|
||||
nir_load_instance_id(b),
|
||||
nir_get_io_offset_src(intrin)->ssa,
|
||||
.base = nir_intrinsic_base(intrin),
|
||||
.component = nir_intrinsic_component(intrin),
|
||||
.dest_type = nir_intrinsic_dest_type(intrin));
|
||||
nir_def_replace(&intrin->def, ld_attr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
static bool
|
||||
lower_gl_pos_layer_writes(nir_builder *b, nir_instr *instr, void *data)
|
||||
@@ -555,6 +580,10 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
|
||||
|
||||
pan_shader_preprocess(nir, compile_input->gpu_id);
|
||||
|
||||
if (stage == MESA_SHADER_VERTEX)
|
||||
NIR_PASS(_, nir, nir_shader_intrinsics_pass, panvk_lower_load_vs_input,
|
||||
nir_metadata_control_flow, NULL);
|
||||
|
||||
/* since valhall, panvk_per_arch(nir_lower_descriptors) separates the
|
||||
* driver set and the user sets, and does not need pan_lower_image_index
|
||||
*/
|
||||
|
Reference in New Issue
Block a user