diff --git a/src/gallium/drivers/panfrost/pan_allocate.c b/src/gallium/drivers/panfrost/pan_allocate.c index 3076c23ab1c..b16a1253ac2 100644 --- a/src/gallium/drivers/panfrost/pan_allocate.c +++ b/src/gallium/drivers/panfrost/pan_allocate.c @@ -63,8 +63,18 @@ panfrost_allocate_transient(struct panfrost_batch *batch, size_t sz) size_t bo_sz = sz < TRANSIENT_SLAB_SIZE ? TRANSIENT_SLAB_SIZE : ALIGN_POT(sz, 4096); - /* We can't reuse the current BO, but we can create a new one. */ - bo = panfrost_batch_create_bo(batch, bo_sz, 0); + /* We can't reuse the current BO, but we can create a new one. + * We don't know what the BO will be used for, so let's flag it + * RW and attach it to both the fragment and vertex/tiler jobs. + * TODO: if we want fine grained BO assignment we should pass + * flags to this function and keep the read/write, + * fragment/vertex+tiler pools separate. + */ + bo = panfrost_batch_create_bo(batch, bo_sz, 0, + PAN_BO_ACCESS_PRIVATE | + PAN_BO_ACCESS_RW | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); if (sz < TRANSIENT_SLAB_SIZE) { batch->transient_bo = bo; diff --git a/src/gallium/drivers/panfrost/pan_blend_cso.c b/src/gallium/drivers/panfrost/pan_blend_cso.c index 6bd6ff71cdc..48bd513ab6e 100644 --- a/src/gallium/drivers/panfrost/pan_blend_cso.c +++ b/src/gallium/drivers/panfrost/pan_blend_cso.c @@ -273,7 +273,11 @@ panfrost_get_blend_for_context(struct panfrost_context *ctx, unsigned rti) /* Upload the shader */ final.shader.bo = panfrost_batch_create_bo(batch, shader->size, - PAN_BO_EXECUTE); + PAN_BO_EXECUTE, + PAN_BO_ACCESS_PRIVATE | + PAN_BO_ACCESS_READ | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); memcpy(final.shader.bo->cpu, shader->buffer, shader->size); if (shader->patch_index) { diff --git a/src/gallium/drivers/panfrost/pan_bo.h b/src/gallium/drivers/panfrost/pan_bo.h index 33fbddff336..73cc74a260d 100644 --- a/src/gallium/drivers/panfrost/pan_bo.h +++ b/src/gallium/drivers/panfrost/pan_bo.h @@ -56,6 +56,24 @@ struct panfrost_screen; * let the BO logic know about this contraint. */ #define PAN_BO_DONT_REUSE (1 << 5) +/* GPU access flags */ + +/* BO is either shared (can be accessed by more than one GPU batch) or private + * (reserved by a specific GPU job). */ +#define PAN_BO_ACCESS_PRIVATE (0 << 0) +#define PAN_BO_ACCESS_SHARED (1 << 0) + +/* BO is being read/written by the GPU */ +#define PAN_BO_ACCESS_READ (1 << 1) +#define PAN_BO_ACCESS_WRITE (1 << 2) +#define PAN_BO_ACCESS_RW (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE) + +/* BO is accessed by the vertex/tiler job. */ +#define PAN_BO_ACCESS_VERTEX_TILER (1 << 3) + +/* BO is accessed by the fragment job. */ +#define PAN_BO_ACCESS_FRAGMENT (1 << 4) + struct panfrost_bo { /* Must be first for casting */ struct list_head link; @@ -78,6 +96,17 @@ struct panfrost_bo { uint32_t flags; }; +static inline uint32_t +panfrost_bo_access_for_stage(enum pipe_shader_type stage) +{ + assert(stage == PIPE_SHADER_FRAGMENT || + stage == PIPE_SHADER_VERTEX); + + return stage == PIPE_SHADER_FRAGMENT ? + PAN_BO_ACCESS_FRAGMENT : + PAN_BO_ACCESS_VERTEX_TILER; +} + void panfrost_bo_reference(struct panfrost_bo *bo); void diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index ba32564b6e0..b9945bdac87 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -583,6 +583,7 @@ panfrost_layout_for_texture(struct panfrost_resource *rsrc) static mali_ptr panfrost_upload_tex( struct panfrost_context *ctx, + enum pipe_shader_type st, struct panfrost_sampler_view *view) { if (!view) @@ -610,7 +611,9 @@ panfrost_upload_tex( /* Add the BO to the job so it's retained until the job is done. */ struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); - panfrost_batch_add_bo(batch, rsrc->bo); + panfrost_batch_add_bo(batch, rsrc->bo, + PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ | + panfrost_bo_access_for_stage(st)); /* Add the usage flags in, since they can change across the CSO * lifetime due to layout switches */ @@ -653,7 +656,7 @@ panfrost_upload_texture_descriptors(struct panfrost_context *ctx) for (int i = 0; i < ctx->sampler_view_count[t]; ++i) trampolines[i] = - panfrost_upload_tex(ctx, ctx->sampler_views[t][i]); + panfrost_upload_tex(ctx, t, ctx->sampler_views[t][i]); trampoline = panfrost_upload_transient(batch, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); } @@ -729,7 +732,9 @@ static void panfrost_upload_ssbo_sysval( struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct panfrost_bo *bo = pan_resource(sb.buffer)->bo; - panfrost_batch_add_bo(batch, bo); + panfrost_batch_add_bo(batch, bo, + PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW | + panfrost_bo_access_for_stage(st)); /* Upload address and size as sysval */ uniform->du[0] = bo->gpu + sb.buffer_offset; @@ -795,6 +800,7 @@ panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf, unsigned static mali_ptr panfrost_map_constant_buffer_gpu( struct panfrost_context *ctx, + enum pipe_shader_type st, struct panfrost_constant_buffer *buf, unsigned index) { @@ -803,7 +809,10 @@ panfrost_map_constant_buffer_gpu( struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); if (rsrc) { - panfrost_batch_add_bo(batch, rsrc->bo); + panfrost_batch_add_bo(batch, rsrc->bo, + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_READ | + panfrost_bo_access_for_stage(st)); return rsrc->bo->gpu; } else if (cb->user_buffer) { return panfrost_upload_transient(batch, cb->user_buffer, cb->buffer_size); @@ -845,7 +854,10 @@ panfrost_patch_shader_state( struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); /* Add the shader BO to the batch. */ - panfrost_batch_add_bo(batch, ss->bo); + panfrost_batch_add_bo(batch, ss->bo, + PAN_BO_ACCESS_PRIVATE | + PAN_BO_ACCESS_READ | + panfrost_bo_access_for_stage(stage)); /* We can't reuse over frames; that's not safe. The descriptor must be * transient uploaded */ @@ -1175,7 +1187,7 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) continue; } - mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, buf, ubo); + mali_ptr gpu = panfrost_map_constant_buffer_gpu(ctx, i, buf, ubo); unsigned bytes_per_field = 16; unsigned aligned = ALIGN_POT(usz, bytes_per_field); @@ -1397,7 +1409,10 @@ panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe if (!info->has_user_indices) { /* Only resources can be directly mapped */ - panfrost_batch_add_bo(batch, rsrc->bo); + panfrost_batch_add_bo(batch, rsrc->bo, + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_READ | + PAN_BO_ACCESS_VERTEX_TILER); return rsrc->bo->gpu + offset; } else { /* Otherwise, we need to upload to transient memory */ diff --git a/src/gallium/drivers/panfrost/pan_instancing.c b/src/gallium/drivers/panfrost/pan_instancing.c index 806823f0da0..473026cb8a0 100644 --- a/src/gallium/drivers/panfrost/pan_instancing.c +++ b/src/gallium/drivers/panfrost/pan_instancing.c @@ -298,7 +298,10 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch) unsigned chopped_addr = raw_addr - addr; /* Add a dependency of the batch on the vertex buffer */ - panfrost_batch_add_bo(batch, rsrc->bo); + panfrost_batch_add_bo(batch, rsrc->bo, + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_READ | + PAN_BO_ACCESS_VERTEX_TILER); /* Set common fields */ attrs[k].elements = addr; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 69047ca62a6..8e2703ae168 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -132,7 +132,8 @@ panfrost_get_batch_for_fbo(struct panfrost_context *ctx) } void -panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo) +panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo, + uint32_t flags) { if (!bo) return; @@ -146,26 +147,30 @@ panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo) void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch) { + uint32_t flags = PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_WRITE | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT; + for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) { struct panfrost_resource *rsrc = pan_resource(batch->key.cbufs[i]->texture); - panfrost_batch_add_bo(batch, rsrc->bo); + panfrost_batch_add_bo(batch, rsrc->bo, flags); } if (batch->key.zsbuf) { struct panfrost_resource *rsrc = pan_resource(batch->key.zsbuf->texture); - panfrost_batch_add_bo(batch, rsrc->bo); + panfrost_batch_add_bo(batch, rsrc->bo, flags); } } struct panfrost_bo * panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size, - uint32_t create_flags) + uint32_t create_flags, uint32_t access_flags) { struct panfrost_bo *bo; bo = panfrost_bo_create(pan_screen(batch->ctx->base.screen), size, create_flags); - panfrost_batch_add_bo(batch, bo); + panfrost_batch_add_bo(batch, bo, access_flags); /* panfrost_batch_add_bo() has retained a reference and * panfrost_bo_create() initialize the refcnt to 1, so let's @@ -189,7 +194,11 @@ panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size) /* Create the BO as invisible, as there's no reason to map */ batch->polygon_list = panfrost_batch_create_bo(batch, size, - PAN_BO_INVISIBLE); + PAN_BO_INVISIBLE, + PAN_BO_ACCESS_PRIVATE | + PAN_BO_ACCESS_RW | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); } return batch->polygon_list->gpu; @@ -202,7 +211,11 @@ panfrost_batch_get_scratchpad(struct panfrost_batch *batch) return batch->scratchpad; batch->scratchpad = panfrost_batch_create_bo(batch, 64 * 4 * 4096, - PAN_BO_INVISIBLE); + PAN_BO_INVISIBLE, + PAN_BO_ACCESS_PRIVATE | + PAN_BO_ACCESS_RW | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); assert(batch->scratchpad); return batch->scratchpad; } @@ -215,7 +228,11 @@ panfrost_batch_get_tiler_heap(struct panfrost_batch *batch) batch->tiler_heap = panfrost_batch_create_bo(batch, 4096 * 4096, PAN_BO_INVISIBLE | - PAN_BO_GROWABLE); + PAN_BO_GROWABLE, + PAN_BO_ACCESS_PRIVATE | + PAN_BO_ACCESS_RW | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); assert(batch->tiler_heap); return batch->tiler_heap; } @@ -227,7 +244,11 @@ panfrost_batch_get_tiler_dummy(struct panfrost_batch *batch) return batch->tiler_dummy; batch->tiler_dummy = panfrost_batch_create_bo(batch, 4096, - PAN_BO_INVISIBLE); + PAN_BO_INVISIBLE, + PAN_BO_ACCESS_PRIVATE | + PAN_BO_ACCESS_RW | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); assert(batch->tiler_dummy); return batch->tiler_dummy; } diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h index e5e0e323f2f..0b37a3131e8 100644 --- a/src/gallium/drivers/panfrost/pan_job.h +++ b/src/gallium/drivers/panfrost/pan_job.h @@ -131,13 +131,14 @@ void panfrost_batch_init(struct panfrost_context *ctx); void -panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo); +panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo, + uint32_t flags); void panfrost_batch_add_fbo_bos(struct panfrost_batch *batch); struct panfrost_bo * panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size, - uint32_t create_flags); + uint32_t create_flags, uint32_t access_flags); void panfrost_batch_submit(struct panfrost_batch *batch); diff --git a/src/gallium/drivers/panfrost/pan_varyings.c b/src/gallium/drivers/panfrost/pan_varyings.c index 9dbd1e2ebac..8383c3efd28 100644 --- a/src/gallium/drivers/panfrost/pan_varyings.c +++ b/src/gallium/drivers/panfrost/pan_varyings.c @@ -69,7 +69,15 @@ panfrost_emit_streamout( /* Grab the BO and bind it to the batch */ struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct panfrost_bo *bo = pan_resource(target->buffer)->bo; - panfrost_batch_add_bo(batch, bo); + + /* Varyings are WRITE from the perspective of the VERTEX but READ from + * the perspective of the TILER and FRAGMENT. + */ + panfrost_batch_add_bo(batch, bo, + PAN_BO_ACCESS_SHARED | + PAN_BO_ACCESS_RW | + PAN_BO_ACCESS_VERTEX_TILER | + PAN_BO_ACCESS_FRAGMENT); mali_ptr addr = bo->gpu + target->buffer_offset + (offset * slot->stride); slot->elements = addr;