panvk: implement multiview support
In Valhall multiview, position/varying shaders are invoked once per draw. Each invocation write separate outputs for all views. Fragment processing is handled by the existing multilayer support. Note that because the hardware only supports up to 8 views, we don't have to care about the case where there are too many layers to fit in one tiler when multiview is enabled. Signed-off-by: Benjamin Lee <benjamin.lee@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31704>
This commit is contained in:
@@ -1047,7 +1047,8 @@ bifrost_nir_specialize_idvs(nir_builder *b, nir_instr *instr, void *data)
|
|||||||
|
|
||||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||||
|
|
||||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
if (intr->intrinsic != nir_intrinsic_store_output &&
|
||||||
|
intr->intrinsic != nir_intrinsic_store_per_view_output)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (bi_should_remove_store(intr, *idvs)) {
|
if (bi_should_remove_store(intr, *idvs)) {
|
||||||
@@ -1127,11 +1128,12 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
|
|||||||
bi_imm_u32(format), regfmt, nr - 1);
|
bi_imm_u32(format), regfmt, nr - 1);
|
||||||
} else if (b->shader->arch >= 9 && b->shader->idvs != BI_IDVS_NONE) {
|
} else if (b->shader->arch >= 9 && b->shader->idvs != BI_IDVS_NONE) {
|
||||||
bi_index index = bi_preload(b, 59);
|
bi_index index = bi_preload(b, 59);
|
||||||
|
unsigned index_offset = 0;
|
||||||
unsigned pos_attr_offset = 0;
|
unsigned pos_attr_offset = 0;
|
||||||
unsigned src_bit_sz = nir_src_bit_size(instr->src[0]);
|
unsigned src_bit_sz = nir_src_bit_size(instr->src[0]);
|
||||||
|
|
||||||
if (psiz || layer)
|
if (psiz || layer)
|
||||||
index = bi_iadd_imm_i32(b, index, 4);
|
index_offset += 4;
|
||||||
|
|
||||||
if (layer) {
|
if (layer) {
|
||||||
assert(nr == 1 && src_bit_sz == 32);
|
assert(nr == 1 && src_bit_sz == 32);
|
||||||
@@ -1143,11 +1145,29 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
|
|||||||
if (psiz)
|
if (psiz)
|
||||||
assert(T_size == 16 && "should've been lowered");
|
assert(T_size == 16 && "should've been lowered");
|
||||||
|
|
||||||
|
bool varying = (b->shader->idvs == BI_IDVS_VARYING);
|
||||||
|
|
||||||
|
if (instr->intrinsic == nir_intrinsic_store_per_view_output) {
|
||||||
|
unsigned view_index = nir_src_as_uint(instr->src[1]);
|
||||||
|
|
||||||
|
if (varying) {
|
||||||
|
index_offset += view_index * 4;
|
||||||
|
} else {
|
||||||
|
/* We don't patch these offsets in the no_psiz variant, so if
|
||||||
|
* multiview is enabled we can't switch to the basic format by
|
||||||
|
* using no_psiz */
|
||||||
|
bool extended_position_fifo = b->shader->nir->info.outputs_written &
|
||||||
|
(VARYING_BIT_LAYER | VARYING_BIT_PSIZ);
|
||||||
|
unsigned position_fifo_stride = extended_position_fifo ? 8 : 4;
|
||||||
|
index_offset += view_index * position_fifo_stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (index_offset != 0)
|
||||||
|
index = bi_iadd_imm_i32(b, index, index_offset);
|
||||||
bi_index address = bi_lea_buf_imm(b, index);
|
bi_index address = bi_lea_buf_imm(b, index);
|
||||||
bi_emit_split_i32(b, a, address, 2);
|
bi_emit_split_i32(b, a, address, 2);
|
||||||
|
|
||||||
bool varying = (b->shader->idvs == BI_IDVS_VARYING);
|
|
||||||
|
|
||||||
bi_store(b, nr * src_bit_sz, data, a[0], a[1],
|
bi_store(b, nr * src_bit_sz, data, a[0], a[1],
|
||||||
varying ? BI_SEG_VARY : BI_SEG_POS,
|
varying ? BI_SEG_VARY : BI_SEG_POS,
|
||||||
varying ? bi_varying_offset(b->shader, instr) : pos_attr_offset);
|
varying ? bi_varying_offset(b->shader, instr) : pos_attr_offset);
|
||||||
@@ -1739,6 +1759,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_store_output:
|
case nir_intrinsic_store_output:
|
||||||
|
case nir_intrinsic_store_per_view_output:
|
||||||
if (stage == MESA_SHADER_FRAGMENT)
|
if (stage == MESA_SHADER_FRAGMENT)
|
||||||
bi_emit_fragment_out(b, instr);
|
bi_emit_fragment_out(b, instr);
|
||||||
else if (stage == MESA_SHADER_VERTEX)
|
else if (stage == MESA_SHADER_VERTEX)
|
||||||
@@ -1978,6 +1999,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
|
|||||||
bi_emit_derivative(b, dst, instr, 2, true);
|
bi_emit_derivative(b, dst, instr, 2, true);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case nir_intrinsic_load_view_index:
|
||||||
case nir_intrinsic_load_layer_id:
|
case nir_intrinsic_load_layer_id:
|
||||||
assert(b->shader->arch >= 9);
|
assert(b->shader->arch >= 9);
|
||||||
bi_mov_i32_to(b, dst, bi_u8_to_u32(b, bi_byte(bi_preload(b, 62), 0)));
|
bi_mov_i32_to(b, dst, bi_u8_to_u32(b, bi_byte(bi_preload(b, 62), 0)));
|
||||||
|
@@ -105,6 +105,7 @@ struct panfrost_compile_inputs {
|
|||||||
} blend;
|
} blend;
|
||||||
bool no_idvs;
|
bool no_idvs;
|
||||||
bool no_ubo_to_push;
|
bool no_ubo_to_push;
|
||||||
|
uint32_t view_mask;
|
||||||
|
|
||||||
/* Used on Valhall.
|
/* Used on Valhall.
|
||||||
*
|
*
|
||||||
|
@@ -36,7 +36,8 @@
|
|||||||
static bool
|
static bool
|
||||||
lower_store_component(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
lower_store_component(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||||
{
|
{
|
||||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
if (intr->intrinsic != nir_intrinsic_store_output &&
|
||||||
|
intr->intrinsic != nir_intrinsic_store_per_view_output)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
struct hash_table_u64 *slots = data;
|
struct hash_table_u64 *slots = data;
|
||||||
@@ -44,6 +45,11 @@ lower_store_component(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||||||
nir_src *slot_src = nir_get_io_offset_src(intr);
|
nir_src *slot_src = nir_get_io_offset_src(intr);
|
||||||
uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr);
|
uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr);
|
||||||
|
|
||||||
|
if (intr->intrinsic == nir_intrinsic_store_per_view_output) {
|
||||||
|
uint64_t view_index = nir_src_as_uint(intr->src[1]);
|
||||||
|
slot |= view_index << 32;
|
||||||
|
}
|
||||||
|
|
||||||
nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot);
|
nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot);
|
||||||
unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0);
|
unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0);
|
||||||
|
|
||||||
|
@@ -1571,6 +1571,7 @@ set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf,
|
|||||||
|
|
||||||
cfg.secondary_shader = vs->info.vs.secondary_enable && fs != NULL;
|
cfg.secondary_shader = vs->info.vs.secondary_enable && fs != NULL;
|
||||||
cfg.primitive_restart = ia->primitive_restart_enable;
|
cfg.primitive_restart = ia->primitive_restart_enable;
|
||||||
|
cfg.view_mask = cmdbuf->state.gfx.render.view_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
cs_move32_to(b, cs_sr_reg32(b, 56), tiler_idvs_flags.opaque[0]);
|
cs_move32_to(b, cs_sr_reg32(b, 56), tiler_idvs_flags.opaque[0]);
|
||||||
@@ -1857,8 +1858,12 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||||||
* we decide to support layared+indirect, we'll need to pass the
|
* we decide to support layared+indirect, we'll need to pass the
|
||||||
* layer_count info through the tiler descriptor, for instance by
|
* layer_count info through the tiler descriptor, for instance by
|
||||||
* re-using one of the word that's flagged 'ignored' in the descriptor
|
* re-using one of the word that's flagged 'ignored' in the descriptor
|
||||||
* (word 14:23). */
|
* (word 14:23).
|
||||||
assert(cmdbuf->state.gfx.render.layer_count <= 1);
|
*
|
||||||
|
* Multiview is limited to 8 layers, and so will always fit in one TD.
|
||||||
|
* Therefore layered rendering is allowed with multiview. */
|
||||||
|
assert(cmdbuf->state.gfx.render.layer_count <= 1 ||
|
||||||
|
cmdbuf->state.gfx.render.view_mask);
|
||||||
|
|
||||||
/* MultiDrawIndirect (.maxDrawIndirectCount) needs additional changes. */
|
/* MultiDrawIndirect (.maxDrawIndirectCount) needs additional changes. */
|
||||||
assert(draw->indirect.draw_count == 1);
|
assert(draw->indirect.draw_count == 1);
|
||||||
@@ -1978,7 +1983,10 @@ panvk_per_arch(cmd_inherit_render_state)(
|
|||||||
sizeof(cmdbuf->state.gfx.render.s_attachment));
|
sizeof(cmdbuf->state.gfx.render.s_attachment));
|
||||||
cmdbuf->state.gfx.render.bound_attachments = 0;
|
cmdbuf->state.gfx.render.bound_attachments = 0;
|
||||||
|
|
||||||
cmdbuf->state.gfx.render.layer_count = 0;
|
cmdbuf->state.gfx.render.view_mask = inheritance_info->viewMask;
|
||||||
|
cmdbuf->state.gfx.render.layer_count = inheritance_info->viewMask ?
|
||||||
|
util_last_bit(inheritance_info->viewMask) :
|
||||||
|
0;
|
||||||
*fbinfo = (struct pan_fb_info){
|
*fbinfo = (struct pan_fb_info){
|
||||||
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
|
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
|
||||||
.nr_samples = inheritance_info->rasterizationSamples,
|
.nr_samples = inheritance_info->rasterizationSamples,
|
||||||
|
@@ -40,6 +40,7 @@ struct panvk_resolve_attachment {
|
|||||||
struct panvk_rendering_state {
|
struct panvk_rendering_state {
|
||||||
VkRenderingFlags flags;
|
VkRenderingFlags flags;
|
||||||
uint32_t layer_count;
|
uint32_t layer_count;
|
||||||
|
uint32_t view_mask;
|
||||||
|
|
||||||
enum vk_rp_attachment_flags bound_attachments;
|
enum vk_rp_attachment_flags bound_attachments;
|
||||||
struct {
|
struct {
|
||||||
|
@@ -227,7 +227,10 @@ panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
|
|||||||
memset(&state->render.s_attachment, 0, sizeof(state->render.s_attachment));
|
memset(&state->render.s_attachment, 0, sizeof(state->render.s_attachment));
|
||||||
state->render.bound_attachments = 0;
|
state->render.bound_attachments = 0;
|
||||||
|
|
||||||
state->render.layer_count = pRenderingInfo->layerCount;
|
cmdbuf->state.gfx.render.layer_count = pRenderingInfo->viewMask ?
|
||||||
|
util_last_bit(pRenderingInfo->viewMask) :
|
||||||
|
pRenderingInfo->layerCount;
|
||||||
|
cmdbuf->state.gfx.render.view_mask = pRenderingInfo->viewMask;
|
||||||
*fbinfo = (struct pan_fb_info){
|
*fbinfo = (struct pan_fb_info){
|
||||||
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
|
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
|
||||||
.nr_samples = 1,
|
.nr_samples = 1,
|
||||||
@@ -390,7 +393,7 @@ panvk_per_arch(cmd_resolve_attachments)(struct panvk_cmd_buffer *cmdbuf)
|
|||||||
.extent.height = fbinfo->extent.maxy - fbinfo->extent.miny + 1,
|
.extent.height = fbinfo->extent.maxy - fbinfo->extent.miny + 1,
|
||||||
},
|
},
|
||||||
.layerCount = cmdbuf->state.gfx.render.layer_count,
|
.layerCount = cmdbuf->state.gfx.render.layer_count,
|
||||||
.viewMask = 0,
|
.viewMask = cmdbuf->state.gfx.render.view_mask,
|
||||||
.colorAttachmentCount = color_att_count,
|
.colorAttachmentCount = color_att_count,
|
||||||
.pColorAttachments = color_atts,
|
.pColorAttachments = color_atts,
|
||||||
.pDepthAttachment = &z_att,
|
.pDepthAttachment = &z_att,
|
||||||
|
@@ -372,6 +372,9 @@ panvk_hash_graphics_state(struct vk_physical_device *device,
|
|||||||
_mesa_blake3_update(&blake3_ctx, &sample_shading_enable,
|
_mesa_blake3_update(&blake3_ctx, &sample_shading_enable,
|
||||||
sizeof(sample_shading_enable));
|
sizeof(sample_shading_enable));
|
||||||
|
|
||||||
|
_mesa_blake3_update(&blake3_ctx, &state->rp->view_mask,
|
||||||
|
sizeof(state->rp->view_mask));
|
||||||
|
|
||||||
_mesa_blake3_final(&blake3_ctx, blake3_out);
|
_mesa_blake3_final(&blake3_ctx, blake3_out);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -458,6 +461,23 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
|
|||||||
to_panvk_instance(dev->vk.physical->instance);
|
to_panvk_instance(dev->vk.physical->instance);
|
||||||
gl_shader_stage stage = nir->info.stage;
|
gl_shader_stage stage = nir->info.stage;
|
||||||
|
|
||||||
|
#if PAN_ARCH >= 10
|
||||||
|
if (stage == MESA_SHADER_VERTEX && compile_input->view_mask) {
|
||||||
|
nir_lower_multiview_options options = {
|
||||||
|
.view_mask = compile_input->view_mask,
|
||||||
|
.allowed_per_view_outputs = ~0
|
||||||
|
};
|
||||||
|
/* The only case where this should fail is with memory/image writes,
|
||||||
|
* which we don't support in vertex shaders */
|
||||||
|
assert(nir_can_lower_multiview(nir, options));
|
||||||
|
NIR_PASS(_, nir, nir_lower_multiview, options);
|
||||||
|
/* Pull output writes out of the loop and give them constant offsets for
|
||||||
|
* pan_lower_store_components */
|
||||||
|
NIR_PASS(_, nir, nir_lower_io_to_temporaries,
|
||||||
|
nir_shader_get_entrypoint(nir), true, false);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
NIR_PASS(_, nir, panvk_per_arch(nir_lower_descriptors), dev, rs,
|
NIR_PASS(_, nir, panvk_per_arch(nir_lower_descriptors), dev, rs,
|
||||||
set_layout_count, set_layouts, shader);
|
set_layout_count, set_layouts, shader);
|
||||||
|
|
||||||
@@ -835,6 +855,7 @@ panvk_compile_shader(struct panvk_device *dev,
|
|||||||
struct panfrost_compile_inputs inputs = {
|
struct panfrost_compile_inputs inputs = {
|
||||||
.gpu_id = phys_dev->kmod.props.gpu_prod_id,
|
.gpu_id = phys_dev->kmod.props.gpu_prod_id,
|
||||||
.no_ubo_to_push = true,
|
.no_ubo_to_push = true,
|
||||||
|
.view_mask = (state && state->rp) ? state->rp->view_mask : 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts,
|
panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts,
|
||||||
|
Reference in New Issue
Block a user