panvk: Add support for layered rendering
This is needed if we want to use vk_meta_blit. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29450>
This commit is contained in:

committed by
Marge Bot

parent
743b41a284
commit
0e74b6eda9
@@ -37,23 +37,25 @@ struct panvk_batch {
|
||||
struct list_head node;
|
||||
struct util_dynarray jobs;
|
||||
struct util_dynarray event_ops;
|
||||
struct pan_jc jc;
|
||||
struct pan_jc vtc_jc;
|
||||
struct pan_jc frag_jc;
|
||||
struct {
|
||||
struct panfrost_ptr desc;
|
||||
uint32_t desc_stride;
|
||||
uint32_t bo_count;
|
||||
|
||||
/* One slot per color, two more slots for the depth/stencil buffers. */
|
||||
struct pan_kmod_bo *bos[MAX_RTS + 2];
|
||||
uint32_t layer_count;
|
||||
} fb;
|
||||
struct {
|
||||
struct pan_kmod_bo *src, *dst;
|
||||
} blit;
|
||||
struct panfrost_ptr tls;
|
||||
mali_ptr fragment_job;
|
||||
struct {
|
||||
struct pan_tiler_context ctx;
|
||||
struct panfrost_ptr heap_desc;
|
||||
struct panfrost_ptr ctx_desc;
|
||||
struct panfrost_ptr ctx_descs;
|
||||
struct mali_tiler_heap_packed heap_templ;
|
||||
struct mali_tiler_context_packed ctx_templ;
|
||||
} tiler;
|
||||
@@ -125,6 +127,7 @@ struct panvk_cmd_graphics_state {
|
||||
|
||||
struct {
|
||||
VkRenderingFlags flags;
|
||||
uint32_t layer_count;
|
||||
|
||||
enum vk_rp_attachment_flags bound_attachments;
|
||||
struct {
|
||||
@@ -206,7 +209,8 @@ void panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf);
|
||||
void panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf,
|
||||
bool gfx);
|
||||
|
||||
void panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf);
|
||||
void panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
|
||||
uint32_t layer_idx);
|
||||
|
||||
void panvk_per_arch(cmd_preload_fb_after_batch_split)(
|
||||
struct panvk_cmd_buffer *cmdbuf);
|
||||
|
@@ -62,22 +62,22 @@ panvk_debug_adjust_bo_flags(const struct panvk_device *device,
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf)
|
||||
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf, mali_ptr fbd)
|
||||
{
|
||||
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
struct panfrost_ptr job_ptr =
|
||||
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB);
|
||||
|
||||
GENX(pan_emit_fragment_job_payload)
|
||||
(fbinfo, batch->fb.desc.gpu, job_ptr.cpu);
|
||||
GENX(pan_emit_fragment_job_payload)(fbinfo, fbd, job_ptr.cpu);
|
||||
|
||||
pan_section_pack(job_ptr.cpu, FRAGMENT_JOB, HEADER, header) {
|
||||
header.type = MALI_JOB_TYPE_FRAGMENT;
|
||||
header.index = 1;
|
||||
}
|
||||
|
||||
batch->fragment_job = job_ptr.gpu;
|
||||
pan_jc_add_job(&batch->frag_jc, MALI_JOB_TYPE_FRAGMENT, false, false, 0, 0,
|
||||
&job_ptr, false);
|
||||
util_dynarray_append(&batch->jobs, void *, job_ptr.cpu);
|
||||
}
|
||||
|
||||
@@ -93,7 +93,7 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
|
||||
|
||||
assert(batch);
|
||||
|
||||
if (!batch->fb.desc.gpu && !batch->jc.first_job) {
|
||||
if (!batch->fb.desc.gpu && !batch->vtc_jc.first_job) {
|
||||
if (util_dynarray_num_elements(&batch->event_ops,
|
||||
struct panvk_cmd_event_op) == 0) {
|
||||
/* Content-less batch, let's drop it */
|
||||
@@ -105,7 +105,7 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
|
||||
struct panfrost_ptr ptr =
|
||||
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, JOB_HEADER);
|
||||
util_dynarray_append(&batch->jobs, void *, ptr.cpu);
|
||||
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_NULL, false, false, 0, 0,
|
||||
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_NULL, false, false, 0, 0,
|
||||
&ptr, false);
|
||||
list_addtail(&batch->node, &cmdbuf->batches);
|
||||
}
|
||||
@@ -119,14 +119,6 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
|
||||
|
||||
list_addtail(&batch->node, &cmdbuf->batches);
|
||||
|
||||
if (batch->jc.first_tiler) {
|
||||
ASSERTED unsigned num_preload_jobs = GENX(pan_preload_fb)(
|
||||
&dev->meta.blitter.cache, &cmdbuf->desc_pool.base,
|
||||
&cmdbuf->state.gfx.render.fb.info, 0, batch->tls.gpu, NULL);
|
||||
|
||||
assert(num_preload_jobs == 0);
|
||||
}
|
||||
|
||||
if (batch->tlsinfo.tls.size) {
|
||||
unsigned thread_tls_alloc =
|
||||
panfrost_query_thread_tls_alloc(&phys_dev->kmod.props);
|
||||
@@ -156,11 +148,29 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
|
||||
panfrost_sample_positions_offset(
|
||||
pan_sample_pattern(fbinfo->nr_samples));
|
||||
|
||||
batch->fb.desc.gpu |= GENX(pan_emit_fbd)(
|
||||
&cmdbuf->state.gfx.render.fb.info, 0, &batch->tlsinfo,
|
||||
&batch->tiler.ctx, batch->fb.desc.cpu);
|
||||
for (uint32_t i = 0; i < batch->fb.layer_count; i++) {
|
||||
mali_ptr fbd = batch->fb.desc.gpu + (batch->fb.desc_stride * i);
|
||||
if (batch->vtc_jc.first_tiler) {
|
||||
cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds.gpu = 0;
|
||||
|
||||
panvk_cmd_prepare_fragment_job(cmdbuf);
|
||||
ASSERTED unsigned num_preload_jobs = GENX(pan_preload_fb)(
|
||||
&dev->meta.blitter.cache, &cmdbuf->desc_pool.base,
|
||||
&cmdbuf->state.gfx.render.fb.info, i, batch->tls.gpu, NULL);
|
||||
|
||||
/* Bifrost GPUs use pre frame DCDs to preload the FB content. We
|
||||
* thus expect num_preload_jobs to be zero.
|
||||
*/
|
||||
assert(!num_preload_jobs);
|
||||
}
|
||||
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, i);
|
||||
fbd |= GENX(pan_emit_fbd)(
|
||||
&cmdbuf->state.gfx.render.fb.info, i, &batch->tlsinfo,
|
||||
&batch->tiler.ctx,
|
||||
batch->fb.desc.cpu + (batch->fb.desc_stride * i));
|
||||
|
||||
panvk_cmd_prepare_fragment_job(cmdbuf, fbd);
|
||||
}
|
||||
}
|
||||
|
||||
cmdbuf->cur_batch = NULL;
|
||||
@@ -176,14 +186,24 @@ panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
|
||||
|
||||
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
|
||||
batch->fb.layer_count = cmdbuf->state.gfx.render.layer_count;
|
||||
unsigned fbd_size = pan_size(FRAMEBUFFER);
|
||||
|
||||
if (has_zs_ext)
|
||||
fbd_size = ALIGN_POT(fbd_size, pan_alignment(ZS_CRC_EXTENSION)) +
|
||||
pan_size(ZS_CRC_EXTENSION);
|
||||
|
||||
fbd_size = ALIGN_POT(fbd_size, pan_alignment(RENDER_TARGET)) +
|
||||
(MAX2(fbinfo->rt_count, 1) * pan_size(RENDER_TARGET));
|
||||
|
||||
batch->fb.bo_count = cmdbuf->state.gfx.render.fb.bo_count;
|
||||
memcpy(batch->fb.bos, cmdbuf->state.gfx.render.fb.bos,
|
||||
batch->fb.bo_count * sizeof(batch->fb.bos[0]));
|
||||
batch->fb.desc = pan_pool_alloc_desc_aggregate(
|
||||
&cmdbuf->desc_pool.base, PAN_DESC(FRAMEBUFFER),
|
||||
PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION),
|
||||
PAN_DESC_ARRAY(MAX2(fbinfo->rt_count, 1), RENDER_TARGET));
|
||||
|
||||
batch->fb.desc = pan_pool_alloc_aligned(&cmdbuf->desc_pool.base,
|
||||
fbd_size * batch->fb.layer_count,
|
||||
pan_alignment(FRAMEBUFFER));
|
||||
batch->fb.desc_stride = fbd_size;
|
||||
|
||||
memset(&cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds, 0,
|
||||
sizeof(cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds));
|
||||
@@ -201,19 +221,22 @@ panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
|
||||
}
|
||||
|
||||
void
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf)
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
|
||||
uint32_t layer_idx)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
|
||||
if (batch->tiler.ctx_desc.cpu)
|
||||
return;
|
||||
if (batch->tiler.ctx_descs.cpu)
|
||||
goto out_set_layer_ctx;
|
||||
|
||||
const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
uint32_t layer_count = cmdbuf->state.gfx.render.layer_count;
|
||||
batch->tiler.heap_desc =
|
||||
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_HEAP);
|
||||
batch->tiler.ctx_desc =
|
||||
pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_CONTEXT);
|
||||
|
||||
batch->tiler.ctx_descs = pan_pool_alloc_desc_array(
|
||||
&cmdbuf->desc_pool.base, layer_count, TILER_CONTEXT);
|
||||
|
||||
pan_pack(&batch->tiler.heap_templ, TILER_HEAP, cfg) {
|
||||
cfg.size = pan_kmod_bo_size(dev->tiler_heap->bo);
|
||||
@@ -232,9 +255,20 @@ panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf)
|
||||
|
||||
memcpy(batch->tiler.heap_desc.cpu, &batch->tiler.heap_templ,
|
||||
sizeof(batch->tiler.heap_templ));
|
||||
memcpy(batch->tiler.ctx_desc.cpu, &batch->tiler.ctx_templ,
|
||||
sizeof(batch->tiler.ctx_templ));
|
||||
batch->tiler.ctx.bifrost = batch->tiler.ctx_desc.gpu;
|
||||
|
||||
struct mali_tiler_context_packed *ctxs = batch->tiler.ctx_descs.cpu;
|
||||
|
||||
assert(layer_count > 0);
|
||||
for (uint32_t i = 0; i < layer_count; i++) {
|
||||
STATIC_ASSERT(
|
||||
!(pan_size(TILER_CONTEXT) & (pan_alignment(TILER_CONTEXT) - 1)));
|
||||
|
||||
memcpy(&ctxs[i], &batch->tiler.ctx_templ, sizeof(*ctxs));
|
||||
}
|
||||
|
||||
out_set_layer_ctx:
|
||||
batch->tiler.ctx.bifrost =
|
||||
batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
|
||||
}
|
||||
|
||||
struct panvk_batch *
|
||||
|
@@ -136,11 +136,11 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
|
||||
|
||||
unsigned copy_desc_dep =
|
||||
copy_desc_job.gpu
|
||||
? pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
|
||||
©_desc_job, false)
|
||||
? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
|
||||
0, 0, ©_desc_job, false)
|
||||
: 0;
|
||||
|
||||
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
|
||||
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
|
||||
copy_desc_dep, &job, false);
|
||||
|
||||
batch->tlsinfo.tls.size = shader->info.tls_size;
|
||||
|
@@ -46,6 +46,7 @@ struct panvk_draw_info {
|
||||
unsigned instance_count;
|
||||
int vertex_offset;
|
||||
unsigned offset_start;
|
||||
uint32_t layer_id;
|
||||
struct mali_invocation_packed invocation;
|
||||
struct {
|
||||
mali_ptr varyings;
|
||||
@@ -105,10 +106,12 @@ panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf,
|
||||
unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0;
|
||||
if (sysvals->vs.first_vertex != draw->offset_start ||
|
||||
sysvals->vs.base_vertex != base_vertex ||
|
||||
sysvals->vs.base_instance != draw->first_instance) {
|
||||
sysvals->vs.base_instance != draw->first_instance ||
|
||||
sysvals->layer_id != draw->layer_id) {
|
||||
sysvals->vs.first_vertex = draw->offset_start;
|
||||
sysvals->vs.base_vertex = base_vertex;
|
||||
sysvals->vs.base_instance = draw->first_instance;
|
||||
sysvals->layer_id = draw->layer_id;
|
||||
cmdbuf->state.gfx.push_uniforms = 0;
|
||||
}
|
||||
|
||||
@@ -468,7 +471,7 @@ panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf,
|
||||
{
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, draw->layer_id);
|
||||
draw->tiler_ctx = &batch->tiler.ctx;
|
||||
}
|
||||
|
||||
@@ -1134,6 +1137,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||
struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
|
||||
struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
|
||||
struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
|
||||
uint32_t layer_count = cmdbuf->state.gfx.render.layer_count;
|
||||
const struct vk_rasterization_state *rs =
|
||||
&cmdbuf->vk.dynamic_graphics_state.rs;
|
||||
bool idvs = vs->info.vs.idvs;
|
||||
@@ -1146,7 +1150,7 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||
* pilot shader dealing with descriptor copies, and we need one
|
||||
* <vertex,tiler> pair per draw.
|
||||
*/
|
||||
if (batch->jc.job_index >= (UINT16_MAX - 4)) {
|
||||
if (batch->vtc_jc.job_index + (4 * layer_count) >= UINT16_MAX) {
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
|
||||
batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
|
||||
@@ -1164,13 +1168,6 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||
|
||||
panvk_per_arch(cmd_prepare_push_descs)(&cmdbuf->desc_pool.base, desc_state,
|
||||
used_set_mask);
|
||||
panvk_cmd_prepare_draw_sysvals(cmdbuf, draw);
|
||||
|
||||
if (!cmdbuf->state.gfx.push_uniforms) {
|
||||
cmdbuf->state.gfx.push_uniforms = panvk_cmd_prepare_push_uniforms(
|
||||
&cmdbuf->desc_pool.base, &cmdbuf->state.push_constants,
|
||||
&cmdbuf->state.gfx.sysvals, sizeof(cmdbuf->state.gfx.sysvals));
|
||||
}
|
||||
|
||||
panvk_per_arch(cmd_prepare_shader_desc_tables)(&cmdbuf->desc_pool.base,
|
||||
&cmdbuf->state.gfx.desc_state,
|
||||
@@ -1179,8 +1176,8 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||
|
||||
unsigned copy_desc_job_id =
|
||||
draw->jobs.vertex_copy_desc.gpu
|
||||
? pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
|
||||
&draw->jobs.vertex_copy_desc, false)
|
||||
? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
|
||||
0, 0, &draw->jobs.vertex_copy_desc, false)
|
||||
: 0;
|
||||
|
||||
bool vs_writes_pos =
|
||||
@@ -1199,43 +1196,51 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
|
||||
* tiler job doesn't execute the fragment shader, the fragment job
|
||||
* will, and the tiler/fragment synchronization happens at the batch
|
||||
* level. */
|
||||
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0,
|
||||
&draw->jobs.frag_copy_desc, false);
|
||||
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
|
||||
0, &draw->jobs.frag_copy_desc, false);
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: indexed draws */
|
||||
draw->tls = batch->tls.gpu;
|
||||
draw->fb = batch->fb.desc.gpu;
|
||||
draw->push_uniforms = cmdbuf->state.gfx.push_uniforms;
|
||||
|
||||
panfrost_pack_work_groups_compute(&draw->invocation, 1, draw->vertex_range,
|
||||
draw->instance_count, 1, 1, 1, true,
|
||||
false);
|
||||
|
||||
panvk_draw_prepare_fs_rsd(cmdbuf, draw);
|
||||
panvk_draw_prepare_varyings(cmdbuf, draw);
|
||||
panvk_draw_prepare_attributes(cmdbuf, draw);
|
||||
panvk_draw_prepare_viewport(cmdbuf, draw);
|
||||
panvk_draw_prepare_tiler_context(cmdbuf, draw);
|
||||
batch->tlsinfo.tls.size = MAX3(vs->info.tls_size, fs ? fs->info.tls_size : 0,
|
||||
batch->tlsinfo.tls.size);
|
||||
|
||||
if (idvs) {
|
||||
panvk_draw_prepare_idvs_job(cmdbuf, draw);
|
||||
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_INDEXED_VERTEX, false, false, 0,
|
||||
copy_desc_job_id, &draw->jobs.idvs, false);
|
||||
} else {
|
||||
panvk_draw_prepare_vertex_job(cmdbuf, draw);
|
||||
for (uint32_t i = 0; i < layer_count; i++) {
|
||||
draw->layer_id = i;
|
||||
panvk_draw_prepare_varyings(cmdbuf, draw);
|
||||
panvk_cmd_prepare_draw_sysvals(cmdbuf, draw);
|
||||
cmdbuf->state.gfx.push_uniforms = panvk_cmd_prepare_push_uniforms(
|
||||
&cmdbuf->desc_pool.base, &cmdbuf->state.push_constants,
|
||||
&cmdbuf->state.gfx.sysvals, sizeof(cmdbuf->state.gfx.sysvals));
|
||||
draw->push_uniforms = cmdbuf->state.gfx.push_uniforms;
|
||||
panvk_draw_prepare_tiler_context(cmdbuf, draw);
|
||||
|
||||
unsigned vjob_id =
|
||||
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_VERTEX, false, false, 0,
|
||||
copy_desc_job_id, &draw->jobs.vertex, false);
|
||||
if (idvs) {
|
||||
panvk_draw_prepare_idvs_job(cmdbuf, draw);
|
||||
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_INDEXED_VERTEX, false,
|
||||
false, 0, copy_desc_job_id, &draw->jobs.idvs, false);
|
||||
} else {
|
||||
panvk_draw_prepare_vertex_job(cmdbuf, draw);
|
||||
|
||||
if (needs_tiling) {
|
||||
panvk_draw_prepare_tiler_job(cmdbuf, draw);
|
||||
pan_jc_add_job(&batch->jc, MALI_JOB_TYPE_TILER, false, false, vjob_id,
|
||||
0, &draw->jobs.tiler, false);
|
||||
unsigned vjob_id =
|
||||
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_VERTEX, false, false,
|
||||
0, copy_desc_job_id, &draw->jobs.vertex, false);
|
||||
|
||||
if (needs_tiling) {
|
||||
panvk_draw_prepare_tiler_job(cmdbuf, draw);
|
||||
pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_TILER, false, false,
|
||||
vjob_id, 0, &draw->jobs.tiler, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1417,6 +1422,7 @@ panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer *cmdbuf,
|
||||
sizeof(cmdbuf->state.gfx.render.color_attachments));
|
||||
cmdbuf->state.gfx.render.bound_attachments = 0;
|
||||
|
||||
cmdbuf->state.gfx.render.layer_count = pRenderingInfo->layerCount;
|
||||
*fbinfo = (struct pan_fb_info){
|
||||
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
|
||||
.nr_samples = 1,
|
||||
|
@@ -63,7 +63,8 @@ panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf,
|
||||
/* Let's close the current batch so any future commands wait on the
|
||||
* event signal operation.
|
||||
*/
|
||||
if (cmdbuf->cur_batch->fragment_job || cmdbuf->cur_batch->jc.first_job) {
|
||||
if (cmdbuf->cur_batch->frag_jc.first_job ||
|
||||
cmdbuf->cur_batch->vtc_jc.first_job) {
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
|
||||
panvk_per_arch(cmd_open_batch)(cmdbuf);
|
||||
|
@@ -120,6 +120,7 @@ panvk_meta_blit(struct panvk_cmd_buffer *cmdbuf,
|
||||
}
|
||||
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
cmdbuf->state.gfx.render.layer_count = 1;
|
||||
|
||||
GENX(pan_blit_ctx_init)
|
||||
(&dev->meta.blitter.cache, blitinfo, &cmdbuf->desc_pool.base, &ctx);
|
||||
@@ -136,13 +137,13 @@ panvk_meta_blit(struct panvk_cmd_buffer *cmdbuf,
|
||||
batch->blit.dst = dst_img->bo;
|
||||
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
|
||||
panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, 0);
|
||||
|
||||
tsd = batch->tls.gpu;
|
||||
tiler = batch->tiler.ctx_desc.gpu;
|
||||
tiler = batch->tiler.ctx_descs.gpu;
|
||||
|
||||
struct panfrost_ptr job =
|
||||
GENX(pan_blit)(&ctx, &cmdbuf->desc_pool.base, &batch->jc, tsd, tiler);
|
||||
struct panfrost_ptr job = GENX(pan_blit)(&ctx, &cmdbuf->desc_pool.base,
|
||||
&batch->vtc_jc, tsd, tiler);
|
||||
util_dynarray_append(&batch->jobs, void *, job.cpu);
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
} while (pan_blit_next_surface(&ctx));
|
||||
|
@@ -274,7 +274,8 @@ panvk_meta_clear_attachment(struct panvk_cmd_buffer *cmdbuf, unsigned rt,
|
||||
|
||||
panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
|
||||
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
|
||||
cmdbuf->state.gfx.render.layer_count = 1;
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, 0);
|
||||
|
||||
mali_ptr vpd = panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
|
||||
minx, miny, maxx, maxy);
|
||||
@@ -288,7 +289,7 @@ panvk_meta_clear_attachment(struct panvk_cmd_buffer *cmdbuf, unsigned rt,
|
||||
|
||||
enum glsl_base_type base_type = panvk_meta_get_format_type(pfmt);
|
||||
|
||||
mali_ptr tiler = batch->tiler.ctx_desc.gpu;
|
||||
mali_ptr tiler = batch->tiler.ctx_descs.gpu;
|
||||
mali_ptr tsd = batch->tls.gpu;
|
||||
|
||||
mali_ptr pushconsts = 0, rsd = 0;
|
||||
@@ -311,8 +312,8 @@ panvk_meta_clear_attachment(struct panvk_cmd_buffer *cmdbuf, unsigned rt,
|
||||
struct panfrost_ptr job;
|
||||
|
||||
job = panvk_meta_clear_attachment_emit_tiler_job(
|
||||
&cmdbuf->desc_pool.base, &batch->jc, coordinates, pushconsts, vpd, rsd,
|
||||
tsd, tiler);
|
||||
&cmdbuf->desc_pool.base, &batch->vtc_jc, coordinates, pushconsts, vpd,
|
||||
rsd, tsd, tiler);
|
||||
|
||||
util_dynarray_append(&batch->jobs, void *, job.cpu);
|
||||
}
|
||||
@@ -336,6 +337,7 @@ panvk_meta_clear_color_img(struct panvk_cmd_buffer *cmdbuf,
|
||||
PIPE_SWIZZLE_W},
|
||||
};
|
||||
|
||||
cmdbuf->state.gfx.render.layer_count = 1;
|
||||
cmdbuf->state.gfx.render.fb.crc_valid[0] = false;
|
||||
*fbinfo = (struct pan_fb_info){
|
||||
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
|
||||
|
@@ -629,6 +629,7 @@ panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
|
||||
u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel);
|
||||
unsigned height =
|
||||
u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel);
|
||||
cmdbuf->state.gfx.render.layer_count = 1;
|
||||
cmdbuf->state.gfx.render.fb.crc_valid[0] = false;
|
||||
*fbinfo = (struct pan_fb_info){
|
||||
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
|
||||
@@ -679,18 +680,18 @@ panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
|
||||
batch->blit.dst = dst->bo;
|
||||
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
|
||||
panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, 0);
|
||||
|
||||
mali_ptr tsd, tiler;
|
||||
|
||||
tsd = batch->tls.gpu;
|
||||
tiler = batch->tiler.ctx_desc.gpu;
|
||||
tiler = batch->tiler.ctx_descs.gpu;
|
||||
|
||||
struct panfrost_ptr job;
|
||||
|
||||
job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, &batch->jc,
|
||||
src_coords, dst_coords, texture,
|
||||
sampler, 0, vpd, rsd, tsd, tiler);
|
||||
job = panvk_meta_copy_emit_tiler_job(
|
||||
&cmdbuf->desc_pool.base, &batch->vtc_jc, src_coords, dst_coords,
|
||||
texture, sampler, 0, vpd, rsd, tsd, tiler);
|
||||
|
||||
util_dynarray_append(&batch->jobs, void *, job.cpu);
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
@@ -1062,6 +1063,7 @@ panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
|
||||
};
|
||||
|
||||
/* TODO: don't force preloads of dst resources if unneeded */
|
||||
cmdbuf->state.gfx.render.layer_count = 1;
|
||||
cmdbuf->state.gfx.render.fb.crc_valid[0] = false;
|
||||
*fbinfo = (struct pan_fb_info){
|
||||
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
|
||||
@@ -1119,18 +1121,18 @@ panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
|
||||
batch->blit.dst = img->bo;
|
||||
panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
|
||||
panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
|
||||
panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, 0);
|
||||
|
||||
mali_ptr tsd, tiler;
|
||||
|
||||
tsd = batch->tls.gpu;
|
||||
tiler = batch->tiler.ctx_desc.gpu;
|
||||
tiler = batch->tiler.ctx_descs.gpu;
|
||||
|
||||
struct panfrost_ptr job;
|
||||
|
||||
job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, &batch->jc,
|
||||
src_coords, dst_coords, 0, 0,
|
||||
pushconsts, vpd, rsd, tsd, tiler);
|
||||
job = panvk_meta_copy_emit_tiler_job(
|
||||
&cmdbuf->desc_pool.base, &batch->vtc_jc, src_coords, dst_coords, 0, 0,
|
||||
pushconsts, vpd, rsd, tsd, tiler);
|
||||
|
||||
util_dynarray_append(&batch->jobs, void *, job.cpu);
|
||||
panvk_per_arch(cmd_close_batch)(cmdbuf);
|
||||
@@ -1560,8 +1562,8 @@ panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf,
|
||||
};
|
||||
|
||||
struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
|
||||
&cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, texture, sampler,
|
||||
pushconsts, rsd, tsd);
|
||||
&cmdbuf->desc_pool.base, &batch->vtc_jc, &num_wg, &wg_sz, texture,
|
||||
sampler, pushconsts, rsd, tsd);
|
||||
|
||||
util_dynarray_append(&batch->jobs, void *, job.cpu);
|
||||
|
||||
@@ -1727,8 +1729,8 @@ panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
|
||||
struct pan_compute_dim num_wg = {nblocks, 1, 1};
|
||||
struct pan_compute_dim wg_sz = {1, 1, 1};
|
||||
struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
|
||||
&cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, 0, 0, pushconsts,
|
||||
rsd, tsd);
|
||||
&cmdbuf->desc_pool.base, &batch->vtc_jc, &num_wg, &wg_sz, 0, 0,
|
||||
pushconsts, rsd, tsd);
|
||||
|
||||
util_dynarray_append(&batch->jobs, void *, job.cpu);
|
||||
|
||||
@@ -1875,8 +1877,8 @@ panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf,
|
||||
struct pan_compute_dim num_wg = {nwords, 1, 1};
|
||||
struct pan_compute_dim wg_sz = {1, 1, 1};
|
||||
struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
|
||||
&cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, 0, 0, pushconsts,
|
||||
rsd, tsd);
|
||||
&cmdbuf->desc_pool.base, &batch->vtc_jc, &num_wg, &wg_sz, 0, 0,
|
||||
pushconsts, rsd, tsd);
|
||||
|
||||
util_dynarray_append(&batch->jobs, void *, job.cpu);
|
||||
|
||||
@@ -1925,8 +1927,8 @@ panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf,
|
||||
struct pan_compute_dim num_wg = {nblocks, 1, 1};
|
||||
struct pan_compute_dim wg_sz = {1, 1, 1};
|
||||
struct panfrost_ptr job = panvk_meta_copy_emit_compute_job(
|
||||
&cmdbuf->desc_pool.base, &batch->jc, &num_wg, &wg_sz, 0, 0, pushconsts,
|
||||
rsd, tsd);
|
||||
&cmdbuf->desc_pool.base, &batch->vtc_jc, &num_wg, &wg_sz, 0, 0,
|
||||
pushconsts, rsd, tsd);
|
||||
|
||||
util_dynarray_append(&batch->jobs, void *, job.cpu);
|
||||
|
||||
|
@@ -48,22 +48,25 @@ panvk_queue_submit_batch(struct panvk_queue *queue, struct panvk_batch *batch,
|
||||
memset((*job), 0, 4 * 4);
|
||||
|
||||
/* Reset the tiler before re-issuing the batch */
|
||||
if (batch->tiler.ctx_desc.cpu) {
|
||||
if (batch->tiler.ctx_descs.cpu) {
|
||||
memcpy(batch->tiler.heap_desc.cpu, &batch->tiler.heap_templ,
|
||||
sizeof(batch->tiler.heap_templ));
|
||||
memcpy(batch->tiler.ctx_desc.cpu, &batch->tiler.ctx_templ,
|
||||
sizeof(batch->tiler.ctx_templ));
|
||||
|
||||
struct mali_tiler_context_packed *ctxs = batch->tiler.ctx_descs.cpu;
|
||||
|
||||
for (uint32_t i = 0; i < batch->fb.layer_count; i++)
|
||||
memcpy(&ctxs[i], &batch->tiler.ctx_templ, sizeof(*ctxs));
|
||||
}
|
||||
}
|
||||
|
||||
if (batch->jc.first_job) {
|
||||
if (batch->vtc_jc.first_job) {
|
||||
struct drm_panfrost_submit submit = {
|
||||
.bo_handles = (uintptr_t)bos,
|
||||
.bo_handle_count = nr_bos,
|
||||
.in_syncs = (uintptr_t)in_fences,
|
||||
.in_sync_count = nr_in_fences,
|
||||
.out_sync = queue->sync,
|
||||
.jc = batch->jc.first_job,
|
||||
.jc = batch->vtc_jc.first_job,
|
||||
};
|
||||
|
||||
ret = drmIoctl(dev->vk.drm_fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
|
||||
@@ -76,7 +79,7 @@ panvk_queue_submit_batch(struct panvk_queue *queue, struct panvk_batch *batch,
|
||||
}
|
||||
|
||||
if (debug & PANVK_DEBUG_TRACE) {
|
||||
pandecode_jc(dev->debug.decode_ctx, batch->jc.first_job,
|
||||
pandecode_jc(dev->debug.decode_ctx, batch->vtc_jc.first_job,
|
||||
phys_dev->kmod.props.gpu_prod_id);
|
||||
}
|
||||
|
||||
@@ -88,16 +91,16 @@ panvk_queue_submit_batch(struct panvk_queue *queue, struct panvk_batch *batch,
|
||||
phys_dev->kmod.props.gpu_prod_id);
|
||||
}
|
||||
|
||||
if (batch->fragment_job) {
|
||||
if (batch->frag_jc.first_job) {
|
||||
struct drm_panfrost_submit submit = {
|
||||
.bo_handles = (uintptr_t)bos,
|
||||
.bo_handle_count = nr_bos,
|
||||
.out_sync = queue->sync,
|
||||
.jc = batch->fragment_job,
|
||||
.jc = batch->frag_jc.first_job,
|
||||
.requirements = PANFROST_JD_REQ_FS,
|
||||
};
|
||||
|
||||
if (batch->jc.first_job) {
|
||||
if (batch->vtc_jc.first_job) {
|
||||
submit.in_syncs = (uintptr_t)(&queue->sync);
|
||||
submit.in_sync_count = 1;
|
||||
} else {
|
||||
@@ -114,7 +117,7 @@ panvk_queue_submit_batch(struct panvk_queue *queue, struct panvk_batch *batch,
|
||||
}
|
||||
|
||||
if (debug & PANVK_DEBUG_TRACE)
|
||||
pandecode_jc(dev->debug.decode_ctx, batch->fragment_job,
|
||||
pandecode_jc(dev->debug.decode_ctx, batch->frag_jc.first_job,
|
||||
phys_dev->kmod.props.gpu_prod_id);
|
||||
|
||||
if (debug & PANVK_DEBUG_DUMP)
|
||||
@@ -236,7 +239,7 @@ panvk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
|
||||
panvk_pool_num_bos(&cmdbuf->tls_pool) +
|
||||
batch->fb.bo_count + (batch->blit.src ? 1 : 0) +
|
||||
(batch->blit.dst ? 1 : 0) +
|
||||
(batch->jc.first_tiler ? 1 : 0) + 1;
|
||||
(batch->vtc_jc.first_tiler ? 1 : 0) + 1;
|
||||
unsigned bo_idx = 0;
|
||||
uint32_t bos[nr_bos];
|
||||
|
||||
@@ -258,7 +261,7 @@ panvk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
|
||||
if (batch->blit.dst)
|
||||
bos[bo_idx++] = pan_kmod_bo_handle(batch->blit.dst);
|
||||
|
||||
if (batch->jc.first_tiler)
|
||||
if (batch->vtc_jc.first_tiler)
|
||||
bos[bo_idx++] = pan_kmod_bo_handle(dev->tiler_heap->bo);
|
||||
|
||||
bos[bo_idx++] = pan_kmod_bo_handle(dev->sample_positions->bo);
|
||||
|
@@ -57,6 +57,11 @@ struct panvk_graphics_sysvals {
|
||||
} vs;
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
/* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
|
||||
* layer, and filter primitives at the VS level.
|
||||
*/
|
||||
int32_t layer_id;
|
||||
|
||||
struct {
|
||||
uint64_t sets[MAX_SETS];
|
||||
uint64_t vs_dyn_ssbos;
|
||||
|
@@ -58,14 +58,13 @@
|
||||
#include "vk_util.h"
|
||||
|
||||
static nir_def *
|
||||
load_sysval_from_push_const(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
unsigned offset)
|
||||
load_sysval_from_push_const(nir_builder *b, unsigned offset, unsigned bit_size,
|
||||
unsigned num_comps)
|
||||
{
|
||||
return nir_load_push_constant(
|
||||
b, intr->def.num_components, intr->def.bit_size, nir_imm_int(b, 0),
|
||||
b, num_comps, bit_size, nir_imm_int(b, 0),
|
||||
/* Push constants are placed first, and then come the sysvals. */
|
||||
.base = offset + 256,
|
||||
.range = intr->def.num_components * intr->def.bit_size / 8);
|
||||
.base = offset + 256, .range = num_comps * bit_size / 8);
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -75,53 +74,54 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
unsigned num_comps = intr->def.num_components;
|
||||
unsigned bit_size = intr->def.bit_size;
|
||||
nir_def *val = NULL;
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
#define SYSVAL(ptype, name) offsetof(struct panvk_##ptype##_sysvals, name)
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
val =
|
||||
load_sysval_from_push_const(b, intr, SYSVAL(compute, base));
|
||||
val = load_sysval_from_push_const(b, SYSVAL(compute, base), bit_size,
|
||||
num_comps);
|
||||
break;
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
val =
|
||||
load_sysval_from_push_const(b, intr, SYSVAL(compute, num_work_groups));
|
||||
val = load_sysval_from_push_const(b, SYSVAL(compute, num_work_groups),
|
||||
bit_size, num_comps);
|
||||
break;
|
||||
case nir_intrinsic_load_workgroup_size:
|
||||
val = load_sysval_from_push_const(b, intr,
|
||||
SYSVAL(compute, local_group_size));
|
||||
val = load_sysval_from_push_const(b, SYSVAL(compute, local_group_size),
|
||||
bit_size, num_comps);
|
||||
break;
|
||||
case nir_intrinsic_load_viewport_scale:
|
||||
val =
|
||||
load_sysval_from_push_const(b, intr, SYSVAL(graphics, viewport.scale));
|
||||
val = load_sysval_from_push_const(b, SYSVAL(graphics, viewport.scale),
|
||||
bit_size, num_comps);
|
||||
break;
|
||||
case nir_intrinsic_load_viewport_offset:
|
||||
val = load_sysval_from_push_const(b, intr,
|
||||
SYSVAL(graphics, viewport.offset));
|
||||
val = load_sysval_from_push_const(b, SYSVAL(graphics, viewport.offset),
|
||||
bit_size, num_comps);
|
||||
break;
|
||||
case nir_intrinsic_load_first_vertex:
|
||||
val = load_sysval_from_push_const(b, intr,
|
||||
SYSVAL(graphics, vs.first_vertex));
|
||||
val = load_sysval_from_push_const(b, SYSVAL(graphics, vs.first_vertex),
|
||||
bit_size, num_comps);
|
||||
break;
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
val =
|
||||
load_sysval_from_push_const(b, intr, SYSVAL(graphics, vs.base_vertex));
|
||||
val = load_sysval_from_push_const(b, SYSVAL(graphics, vs.base_vertex),
|
||||
bit_size, num_comps);
|
||||
break;
|
||||
case nir_intrinsic_load_base_instance:
|
||||
val = load_sysval_from_push_const(b, intr,
|
||||
SYSVAL(graphics, vs.base_instance));
|
||||
val = load_sysval_from_push_const(b, SYSVAL(graphics, vs.base_instance),
|
||||
bit_size, num_comps);
|
||||
break;
|
||||
case nir_intrinsic_load_blend_const_color_rgba:
|
||||
val = load_sysval_from_push_const(b, intr,
|
||||
SYSVAL(graphics, blend.constants));
|
||||
val = load_sysval_from_push_const(b, SYSVAL(graphics, blend.constants),
|
||||
bit_size, num_comps);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_layer_id:
|
||||
/* We don't support layered rendering yet, so force the layer_id to
|
||||
* zero for now.
|
||||
*/
|
||||
val = nir_imm_int(b, 0);
|
||||
assert(b->shader->info.stage = MESA_SHADER_FRAGMENT);
|
||||
val = load_sysval_from_push_const(b, SYSVAL(graphics, layer_id), bit_size,
|
||||
num_comps);
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -134,6 +134,82 @@ panvk_lower_sysvals(nir_builder *b, nir_instr *instr, void *data)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_gl_pos_layer_writes(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intr->intrinsic != nir_intrinsic_copy_deref)
|
||||
return false;
|
||||
|
||||
nir_variable *dst_var = nir_intrinsic_get_var(intr, 0);
|
||||
nir_variable *src_var = nir_intrinsic_get_var(intr, 1);
|
||||
|
||||
if (!dst_var || dst_var->data.mode != nir_var_shader_out || !src_var ||
|
||||
src_var->data.mode != nir_var_shader_temp)
|
||||
return false;
|
||||
|
||||
if (dst_var->data.location == VARYING_SLOT_LAYER) {
|
||||
/* We don't really write the layer, we just make sure primitives are
|
||||
* discarded if gl_Layer doesn't match the layer passed to the draw.
|
||||
*/
|
||||
b->cursor = nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (dst_var->data.location == VARYING_SLOT_POS) {
|
||||
nir_variable *temp_layer_var = data;
|
||||
nir_variable *temp_pos_var = src_var;
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_def *layer = nir_load_var(b, temp_layer_var);
|
||||
nir_def *pos = nir_load_var(b, temp_pos_var);
|
||||
nir_def *inf_pos = nir_imm_vec4(b, INFINITY, INFINITY, INFINITY, 1.0f);
|
||||
nir_def *ref_layer = load_sysval_from_push_const(
|
||||
b, offsetof(struct panvk_graphics_sysvals, layer_id), 32, 1);
|
||||
|
||||
nir_store_var(b, temp_pos_var,
|
||||
nir_bcsel(b, nir_ieq(b, layer, ref_layer), pos, inf_pos),
|
||||
0xf);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_layer_writes(nir_shader *nir)
|
||||
{
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
return false;
|
||||
|
||||
nir_variable *temp_layer_var = NULL;
|
||||
bool has_layer_var = false;
|
||||
|
||||
nir_foreach_variable_with_modes(var, nir,
|
||||
nir_var_shader_out | nir_var_shader_temp) {
|
||||
if (var->data.mode == nir_var_shader_out &&
|
||||
var->data.location == VARYING_SLOT_LAYER)
|
||||
has_layer_var = true;
|
||||
|
||||
if (var->data.mode == nir_var_shader_temp &&
|
||||
var->data.location == VARYING_SLOT_LAYER)
|
||||
temp_layer_var = var;
|
||||
}
|
||||
|
||||
if (!has_layer_var)
|
||||
return false;
|
||||
|
||||
assert(temp_layer_var);
|
||||
|
||||
return nir_shader_instructions_pass(
|
||||
nir, lower_gl_pos_layer_writes,
|
||||
nir_metadata_block_index | nir_metadata_dominance, temp_layer_var);
|
||||
}
|
||||
|
||||
static void
|
||||
shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
|
||||
{
|
||||
@@ -202,6 +278,10 @@ panvk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, nir_shader *nir)
|
||||
NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir),
|
||||
true, true);
|
||||
|
||||
/* This needs to be done just after the io_to_temporaries pass, because we
|
||||
* rely on in/out temporaries to collect the final layer_id value. */
|
||||
NIR_PASS_V(nir, lower_layer_writes);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_indirect_derefs,
|
||||
nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
|
||||
|
||||
|
Reference in New Issue
Block a user