diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 34c52a67fb8..7252daf63be 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -1047,7 +1047,8 @@ bifrost_nir_specialize_idvs(nir_builder *b, nir_instr *instr, void *data) nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic != nir_intrinsic_store_output) + if (intr->intrinsic != nir_intrinsic_store_output && + intr->intrinsic != nir_intrinsic_store_per_view_output) return false; if (bi_should_remove_store(intr, *idvs)) { @@ -1127,11 +1128,12 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) bi_imm_u32(format), regfmt, nr - 1); } else if (b->shader->arch >= 9 && b->shader->idvs != BI_IDVS_NONE) { bi_index index = bi_preload(b, 59); + unsigned index_offset = 0; unsigned pos_attr_offset = 0; unsigned src_bit_sz = nir_src_bit_size(instr->src[0]); if (psiz || layer) - index = bi_iadd_imm_i32(b, index, 4); + index_offset += 4; if (layer) { assert(nr == 1 && src_bit_sz == 32); @@ -1143,11 +1145,29 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr) if (psiz) assert(T_size == 16 && "should've been lowered"); + bool varying = (b->shader->idvs == BI_IDVS_VARYING); + + if (instr->intrinsic == nir_intrinsic_store_per_view_output) { + unsigned view_index = nir_src_as_uint(instr->src[1]); + + if (varying) { + index_offset += view_index * 4; + } else { + /* We don't patch these offsets in the no_psiz variant, so if + * multiview is enabled we can't switch to the basic format by + * using no_psiz */ + bool extended_position_fifo = b->shader->nir->info.outputs_written & + (VARYING_BIT_LAYER | VARYING_BIT_PSIZ); + unsigned position_fifo_stride = extended_position_fifo ? 8 : 4; + index_offset += view_index * position_fifo_stride; + } + } + + if (index_offset != 0) + index = bi_iadd_imm_i32(b, index, index_offset); bi_index address = bi_lea_buf_imm(b, index); bi_emit_split_i32(b, a, address, 2); - bool varying = (b->shader->idvs == BI_IDVS_VARYING); - bi_store(b, nr * src_bit_sz, data, a[0], a[1], varying ? BI_SEG_VARY : BI_SEG_POS, varying ? bi_varying_offset(b->shader, instr) : pos_attr_offset); @@ -1739,6 +1759,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) break; case nir_intrinsic_store_output: + case nir_intrinsic_store_per_view_output: if (stage == MESA_SHADER_FRAGMENT) bi_emit_fragment_out(b, instr); else if (stage == MESA_SHADER_VERTEX) @@ -1978,6 +1999,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) bi_emit_derivative(b, dst, instr, 2, true); break; + case nir_intrinsic_load_view_index: case nir_intrinsic_load_layer_id: assert(b->shader->arch >= 9); bi_mov_i32_to(b, dst, bi_u8_to_u32(b, bi_byte(bi_preload(b, 62), 0))); diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 5b0d9a1cc0c..8168b550039 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -105,6 +105,7 @@ struct panfrost_compile_inputs { } blend; bool no_idvs; bool no_ubo_to_push; + uint32_t view_mask; /* Used on Valhall. * diff --git a/src/panfrost/util/pan_lower_store_component.c b/src/panfrost/util/pan_lower_store_component.c index 717250138a3..8ca139a3a8a 100644 --- a/src/panfrost/util/pan_lower_store_component.c +++ b/src/panfrost/util/pan_lower_store_component.c @@ -36,7 +36,8 @@ static bool lower_store_component(nir_builder *b, nir_intrinsic_instr *intr, void *data) { - if (intr->intrinsic != nir_intrinsic_store_output) + if (intr->intrinsic != nir_intrinsic_store_output && + intr->intrinsic != nir_intrinsic_store_per_view_output) return false; struct hash_table_u64 *slots = data; @@ -44,6 +45,11 @@ lower_store_component(nir_builder *b, nir_intrinsic_instr *intr, void *data) nir_src *slot_src = nir_get_io_offset_src(intr); uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr); + if (intr->intrinsic == nir_intrinsic_store_per_view_output) { + uint64_t view_index = nir_src_as_uint(intr->src[1]); + slot |= view_index << 32; + } + nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot); unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0); diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 953823568c1..8ad3bebf60f 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -1571,6 +1571,7 @@ set_tiler_idvs_flags(struct cs_builder *b, struct panvk_cmd_buffer *cmdbuf, cfg.secondary_shader = vs->info.vs.secondary_enable && fs != NULL; cfg.primitive_restart = ia->primitive_restart_enable; + cfg.view_mask = cmdbuf->state.gfx.render.view_mask; } cs_move32_to(b, cs_sr_reg32(b, 56), tiler_idvs_flags.opaque[0]); @@ -1857,8 +1858,12 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf, * we decide to support layared+indirect, we'll need to pass the * layer_count info through the tiler descriptor, for instance by * re-using one of the word that's flagged 'ignored' in the descriptor - * (word 14:23). */ - assert(cmdbuf->state.gfx.render.layer_count <= 1); + * (word 14:23). + * + * Multiview is limited to 8 layers, and so will always fit in one TD. + * Therefore layered rendering is allowed with multiview. */ + assert(cmdbuf->state.gfx.render.layer_count <= 1 || + cmdbuf->state.gfx.render.view_mask); /* MultiDrawIndirect (.maxDrawIndirectCount) needs additional changes. */ assert(draw->indirect.draw_count == 1); @@ -1978,7 +1983,10 @@ panvk_per_arch(cmd_inherit_render_state)( sizeof(cmdbuf->state.gfx.render.s_attachment)); cmdbuf->state.gfx.render.bound_attachments = 0; - cmdbuf->state.gfx.render.layer_count = 0; + cmdbuf->state.gfx.render.view_mask = inheritance_info->viewMask; + cmdbuf->state.gfx.render.layer_count = inheritance_info->viewMask ? + util_last_bit(inheritance_info->viewMask) : + 0; *fbinfo = (struct pan_fb_info){ .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model), .nr_samples = inheritance_info->rasterizationSamples, diff --git a/src/panfrost/vulkan/panvk_cmd_draw.h b/src/panfrost/vulkan/panvk_cmd_draw.h index 73f751f34da..2312f0e79e6 100644 --- a/src/panfrost/vulkan/panvk_cmd_draw.h +++ b/src/panfrost/vulkan/panvk_cmd_draw.h @@ -40,6 +40,7 @@ struct panvk_resolve_attachment { struct panvk_rendering_state { VkRenderingFlags flags; uint32_t layer_count; + uint32_t view_mask; enum vk_rp_attachment_flags bound_attachments; struct { diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index f184f8c468b..769004d9687 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -227,7 +227,10 @@ panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf, memset(&state->render.s_attachment, 0, sizeof(state->render.s_attachment)); state->render.bound_attachments = 0; - state->render.layer_count = pRenderingInfo->layerCount; + cmdbuf->state.gfx.render.layer_count = pRenderingInfo->viewMask ? + util_last_bit(pRenderingInfo->viewMask) : + pRenderingInfo->layerCount; + cmdbuf->state.gfx.render.view_mask = pRenderingInfo->viewMask; *fbinfo = (struct pan_fb_info){ .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model), .nr_samples = 1, @@ -390,7 +393,7 @@ panvk_per_arch(cmd_resolve_attachments)(struct panvk_cmd_buffer *cmdbuf) .extent.height = fbinfo->extent.maxy - fbinfo->extent.miny + 1, }, .layerCount = cmdbuf->state.gfx.render.layer_count, - .viewMask = 0, + .viewMask = cmdbuf->state.gfx.render.view_mask, .colorAttachmentCount = color_att_count, .pColorAttachments = color_atts, .pDepthAttachment = &z_att, diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index ced0ea76802..fc2b43e6891 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -372,6 +372,9 @@ panvk_hash_graphics_state(struct vk_physical_device *device, _mesa_blake3_update(&blake3_ctx, &sample_shading_enable, sizeof(sample_shading_enable)); + _mesa_blake3_update(&blake3_ctx, &state->rp->view_mask, + sizeof(state->rp->view_mask)); + _mesa_blake3_final(&blake3_ctx, blake3_out); } @@ -458,6 +461,23 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir, to_panvk_instance(dev->vk.physical->instance); gl_shader_stage stage = nir->info.stage; +#if PAN_ARCH >= 10 + if (stage == MESA_SHADER_VERTEX && compile_input->view_mask) { + nir_lower_multiview_options options = { + .view_mask = compile_input->view_mask, + .allowed_per_view_outputs = ~0 + }; + /* The only case where this should fail is with memory/image writes, + * which we don't support in vertex shaders */ + assert(nir_can_lower_multiview(nir, options)); + NIR_PASS(_, nir, nir_lower_multiview, options); + /* Pull output writes out of the loop and give them constant offsets for + * pan_lower_store_components */ + NIR_PASS(_, nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(nir), true, false); + } +#endif + NIR_PASS(_, nir, panvk_per_arch(nir_lower_descriptors), dev, rs, set_layout_count, set_layouts, shader); @@ -835,6 +855,7 @@ panvk_compile_shader(struct panvk_device *dev, struct panfrost_compile_inputs inputs = { .gpu_id = phys_dev->kmod.props.gpu_prod_id, .no_ubo_to_push = true, + .view_mask = (state && state->rp) ? state->rp->view_mask : 0, }; panvk_lower_nir(dev, nir, info->set_layout_count, info->set_layouts,