From 2eaa437574615d1c6a3a51011d13c1e5b6f883cc Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 12 Jun 2024 09:41:38 +0200 Subject: [PATCH] panvk: Use memory pools for internal GPU data attached to vulkan objects Some panvk objects need to allocate GPU memory but don't have Pool objects to get this memory from. Use device-wide mempools with .owns_bos=false, such that small allocations don't have to pay the 4k granularity price of private BO allocations. Signed-off-by: Boris Brezillon Reviewed-by: Mary Guillemard Part-of: --- src/panfrost/vulkan/bifrost/panvk_pipeline.h | 11 +- .../vulkan/bifrost/panvk_vX_meta_desc_copy.c | 2 +- .../vulkan/bifrost/panvk_vX_pipeline.c | 105 +++++++----------- .../vulkan/jm/panvk_vX_cmd_dispatch.c | 2 +- src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c | 14 +-- src/panfrost/vulkan/panvk_buffer_view.h | 6 +- src/panfrost/vulkan/panvk_device.h | 5 + src/panfrost/vulkan/panvk_image_view.h | 2 +- src/panfrost/vulkan/panvk_vX_buffer_view.c | 14 ++- src/panfrost/vulkan/panvk_vX_device.c | 36 ++++++ src/panfrost/vulkan/panvk_vX_image_view.c | 14 ++- 11 files changed, 116 insertions(+), 95 deletions(-) diff --git a/src/panfrost/vulkan/bifrost/panvk_pipeline.h b/src/panfrost/vulkan/bifrost/panvk_pipeline.h index 1aa5c726d84..2bee877bdb5 100644 --- a/src/panfrost/vulkan/bifrost/panvk_pipeline.h +++ b/src/panfrost/vulkan/bifrost/panvk_pipeline.h @@ -27,11 +27,11 @@ #define MAX_RTS 8 struct panvk_pipeline_shader { - mali_ptr code; - mali_ptr rsd; + struct panvk_priv_mem code; + struct panvk_priv_mem rsd; struct { - mali_ptr attribs; + struct panvk_priv_mem attribs; unsigned buf_strides[PANVK_VARY_BUF_MAX]; } varyings; @@ -49,7 +49,7 @@ struct panvk_pipeline_shader { uint32_t count; } dyn_ssbos; struct { - mali_ptr map; + struct panvk_priv_mem map; uint32_t count[PANVK_BIFROST_DESC_TABLE_COUNT]; } others; } desc_info; @@ -65,9 +65,6 @@ struct panvk_pipeline { enum panvk_pipeline_type type; const struct vk_pipeline_layout *layout; - - struct panvk_pool bin_pool; - struct panvk_pool desc_pool; }; VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline, base, VkPipeline, diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c index d5d8642e0fd..2e45484237a 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -301,7 +301,7 @@ panvk_per_arch(meta_get_copy_desc_job)( const struct panvk_descriptor_state *desc_state, const struct panvk_shader_desc_state *shader_desc_state) { - mali_ptr copy_table = shader->desc_info.others.map; + mali_ptr copy_table = panvk_priv_mem_dev_addr(shader->desc_info.others.map); if (!copy_table) return (struct panfrost_ptr){0}; diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c b/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c index cb6d3dcfdb7..55d52fed583 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_pipeline.c @@ -69,10 +69,10 @@ init_pipeline_shader(struct panvk_pipeline *pipeline, unsigned shader_sz = util_dynarray_num_elements(&shader->binary, uint8_t); if (shader_sz) { - pshader->code = pan_pool_upload_aligned(&pipeline->bin_pool.base, - shader_data, shader_sz, 128); + pshader->code = panvk_pool_upload_aligned(&dev->mempools.exec, + shader_data, shader_sz, 128); } else { - pshader->code = 0; + pshader->code = (struct panvk_priv_mem){0}; } pshader->info = shader->info; @@ -85,8 +85,8 @@ init_pipeline_shader(struct panvk_pipeline *pipeline, } if (copy_count) { - pshader->desc_info.others.map = pan_pool_upload_aligned( - &pipeline->desc_pool.base, shader->desc_info.others[0].map, + pshader->desc_info.others.map = panvk_pool_upload_aligned( + &dev->mempools.rw, shader->desc_info.others[0].map, copy_count * sizeof(uint32_t), sizeof(uint32_t)); } @@ -111,20 +111,29 @@ init_pipeline_shader(struct panvk_pipeline *pipeline, } if (stage_info->stage != VK_SHADER_STAGE_FRAGMENT_BIT) { - struct panfrost_ptr rsd = - pan_pool_alloc_desc(&pipeline->desc_pool.base, RENDERER_STATE); + pshader->rsd = panvk_pool_alloc_desc(&dev->mempools.rw, RENDERER_STATE); - pan_pack(rsd.cpu, RENDERER_STATE, cfg) { - pan_shader_prepare_rsd(&pshader->info, pshader->code, &cfg); + pan_pack(panvk_priv_mem_host_addr(pshader->rsd), RENDERER_STATE, cfg) { + pan_shader_prepare_rsd(&pshader->info, + panvk_priv_mem_dev_addr(pshader->code), &cfg); } - - pshader->rsd = rsd.gpu; } panvk_per_arch(shader_destroy)(dev, shader, alloc); return VK_SUCCESS; } +static void +cleanup_pipeline_shader(struct panvk_pipeline *pipeline, + struct panvk_pipeline_shader *pshader) +{ + struct panvk_device *dev = to_panvk_device(pipeline->base.device); + + panvk_pool_free_mem(&dev->mempools.exec, pshader->code); + panvk_pool_free_mem(&dev->mempools.rw, pshader->rsd); + panvk_pool_free_mem(&dev->mempools.rw, pshader->desc_info.others.map); +} + static mali_pixel_format get_varying_format(gl_shader_stage stage, gl_varying_slot loc, enum pipe_format pfmt) @@ -204,16 +213,16 @@ varying_format(gl_varying_slot loc, enum pipe_format pfmt) } } -static mali_ptr -emit_varying_attrs(struct pan_pool *desc_pool, +static struct panvk_priv_mem +emit_varying_attrs(struct panvk_pool *desc_pool, const struct pan_shader_varying *varyings, unsigned varying_count, const struct varyings_info *info, unsigned *buf_offsets) { unsigned attr_count = BITSET_COUNT(info->active); - struct panfrost_ptr ptr = - pan_pool_alloc_desc_array(desc_pool, attr_count, ATTRIBUTE); - struct mali_attribute_packed *attrs = ptr.cpu; + struct panvk_priv_mem mem = + panvk_pool_alloc_desc_array(desc_pool, attr_count, ATTRIBUTE); + struct mali_attribute_packed *attrs = panvk_priv_mem_host_addr(mem); unsigned attr_idx = 0; for (unsigned i = 0; i < varying_count; i++) { @@ -238,7 +247,7 @@ emit_varying_attrs(struct pan_pool *desc_pool, } } - return ptr.gpu; + return mem; } static void @@ -246,6 +255,7 @@ link_shaders(struct panvk_graphics_pipeline *pipeline, struct panvk_pipeline_shader *stage, struct panvk_pipeline_shader *next_stage) { + struct panvk_device *dev = to_panvk_device(pipeline->base.base.device); BITSET_DECLARE(active_attrs, VARYING_SLOT_MAX) = {0}; unsigned buf_strides[PANVK_VARY_BUF_MAX] = {0}; unsigned buf_offsets[VARYING_SLOT_MAX] = {0}; @@ -310,10 +320,10 @@ link_shaders(struct panvk_graphics_pipeline *pipeline, } stage->varyings.attribs = emit_varying_attrs( - &pipeline->base.desc_pool.base, stage->info.varyings.output, + &dev->mempools.rw, stage->info.varyings.output, stage->info.varyings.output_count, &out_vars, buf_offsets); next_stage->varyings.attribs = emit_varying_attrs( - &pipeline->base.desc_pool.base, next_stage->info.varyings.input, + &dev->mempools.rw, next_stage->info.varyings.input, next_stage->info.varyings.input_count, &in_vars, buf_offsets); memcpy(stage->varyings.buf_strides, buf_strides, sizeof(stage->varyings.buf_strides)); @@ -352,26 +362,6 @@ panvk_graphics_pipeline_create(struct panvk_device *dev, vk_dynamic_graphics_state_fill(&gfx_pipeline->state.dynamic, &state); gfx_pipeline->state.rp = *state.rp; - struct panvk_pool_properties bin_pool_props = { - .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, - .slab_size = 4096, - .label = "Pipeline shader binaries", - .prealloc = false, - .owns_bos = true, - .needs_locking = false, - }; - panvk_pool_init(&gfx_pipeline->base.bin_pool, dev, NULL, &bin_pool_props); - - struct panvk_pool_properties desc_pool_props = { - .create_flags = 0, - .slab_size = 4096, - .label = "Pipeline static state", - .prealloc = false, - .owns_bos = true, - .needs_locking = false, - }; - panvk_pool_init(&gfx_pipeline->base.desc_pool, dev, NULL, &desc_pool_props); - /* Make sure the stage info is correct even if no stage info is provided for * this stage in pStages. */ @@ -451,28 +441,6 @@ panvk_compute_pipeline_create(struct panvk_device *dev, compute_pipeline->base.layout = layout; compute_pipeline->base.type = PANVK_PIPELINE_COMPUTE; - struct panvk_pool_properties bin_pool_props = { - .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, - .slab_size = 4096, - .label = "Pipeline shader binaries", - .prealloc = false, - .owns_bos = true, - .needs_locking = false, - }; - panvk_pool_init(&compute_pipeline->base.bin_pool, dev, NULL, - &bin_pool_props); - - struct panvk_pool_properties desc_pool_props = { - .create_flags = 0, - .slab_size = 4096, - .label = "Pipeline static state", - .prealloc = false, - .owns_bos = true, - .needs_locking = false, - }; - panvk_pool_init(&compute_pipeline->base.desc_pool, dev, NULL, - &desc_pool_props); - VkResult result = init_pipeline_shader(&compute_pipeline->base, &create_info->stage, alloc, &compute_pipeline->cs); @@ -518,7 +486,18 @@ panvk_per_arch(DestroyPipeline)(VkDevice _device, VkPipeline _pipeline, VK_FROM_HANDLE(panvk_device, device, _device); VK_FROM_HANDLE(panvk_pipeline, pipeline, _pipeline); - panvk_pool_cleanup(&pipeline->bin_pool); - panvk_pool_cleanup(&pipeline->desc_pool); + if (pipeline->type == PANVK_PIPELINE_GRAPHICS) { + struct panvk_graphics_pipeline *gfx_pipeline = + panvk_pipeline_to_graphics_pipeline(pipeline); + + cleanup_pipeline_shader(pipeline, &gfx_pipeline->vs); + cleanup_pipeline_shader(pipeline, &gfx_pipeline->fs); + } else { + struct panvk_compute_pipeline *compute_pipeline = + panvk_pipeline_to_compute_pipeline(pipeline); + + cleanup_pipeline_shader(pipeline, &compute_pipeline->cs); + } + vk_object_free(&device->vk, pAllocator, pipeline); } diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c index a4f18320276..4ecc85ec779 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c @@ -111,7 +111,7 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, } pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) { - cfg.state = pipeline->cs.rsd; + cfg.state = panvk_priv_mem_dev_addr(pipeline->cs.rsd); cfg.attributes = cs_desc_state->img_attrib_table; cfg.attribute_buffers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG]; diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c index c9f7b2d3c31..dab05dd10cc 100644 --- a/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c @@ -349,18 +349,18 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, PAN_DESC_ARRAY(bd_count, BLEND)); struct mali_renderer_state_packed *rsd = ptr.cpu; struct mali_blend_packed *bds = ptr.cpu + pan_size(RENDERER_STATE); + mali_ptr fs_code = panvk_priv_mem_dev_addr(pipeline->fs.code); panvk_per_arch(blend_emit_descs)( dev, cb, cmdbuf->state.gfx.render.color_attachments.fmts, - cmdbuf->state.gfx.render.color_attachments.samples, fs_info, - pipeline->fs.code, bds, &blend_reads_dest, - &blend_shader_loads_blend_const); + cmdbuf->state.gfx.render.color_attachments.samples, fs_info, fs_code, bds, + &blend_reads_dest, &blend_shader_loads_blend_const); pan_pack(rsd, RENDERER_STATE, cfg) { bool alpha_to_coverage = dyns->ms.alpha_to_coverage_enable; if (needs_fs) { - pan_shader_prepare_rsd(fs_info, pipeline->fs.code, &cfg); + pan_shader_prepare_rsd(fs_info, fs_code, &cfg); if (blend_shader_loads_blend_const) { /* Preload the blend constant if the blend shader depends on it. */ @@ -543,8 +543,8 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, draw->line_width = 1.0f; draw->varying_bufs = bufs.gpu; - draw->vs.varyings = pipeline->vs.varyings.attribs; - draw->fs.varyings = pipeline->fs.varyings.attribs; + draw->vs.varyings = panvk_priv_mem_dev_addr(pipeline->vs.varyings.attribs); + draw->fs.varyings = panvk_priv_mem_dev_addr(pipeline->fs.varyings.attribs); } static void @@ -805,7 +805,7 @@ panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, } pan_section_pack(ptr.cpu, COMPUTE_JOB, DRAW, cfg) { - cfg.state = pipeline->vs.rsd; + cfg.state = panvk_priv_mem_dev_addr(pipeline->vs.rsd); cfg.attributes = draw->vs.attributes; cfg.attribute_buffers = draw->vs.attribute_bufs; cfg.varyings = draw->vs.varyings; diff --git a/src/panfrost/vulkan/panvk_buffer_view.h b/src/panfrost/vulkan/panvk_buffer_view.h index f07a961df72..43950659f4e 100644 --- a/src/panfrost/vulkan/panvk_buffer_view.h +++ b/src/panfrost/vulkan/panvk_buffer_view.h @@ -12,15 +12,15 @@ #include +#include "panvk_mempool.h" + #include "vk_buffer_view.h" #include "genxml/gen_macros.h" -struct panvk_priv_bo; - struct panvk_buffer_view { struct vk_buffer_view vk; - struct panvk_priv_bo *bo; + struct panvk_priv_mem mem; struct { struct mali_texture_packed tex; diff --git a/src/panfrost/vulkan/panvk_device.h b/src/panfrost/vulkan/panvk_device.h index 7257756aaf1..882dbec4175 100644 --- a/src/panfrost/vulkan/panvk_device.h +++ b/src/panfrost/vulkan/panvk_device.h @@ -44,6 +44,11 @@ struct panvk_device { struct panvk_priv_bo *rsd_bo; } desc_copy; + struct { + struct panvk_pool rw; + struct panvk_pool exec; + } mempools; + struct vk_device_dispatch_table cmd_dispatch; struct panvk_queue *queues[PANVK_MAX_QUEUE_FAMILIES]; diff --git a/src/panfrost/vulkan/panvk_image_view.h b/src/panfrost/vulkan/panvk_image_view.h index 15198bf5567..d30af917932 100644 --- a/src/panfrost/vulkan/panvk_image_view.h +++ b/src/panfrost/vulkan/panvk_image_view.h @@ -25,7 +25,7 @@ struct panvk_image_view { struct pan_image_view pview; - struct panvk_priv_bo *bo; + struct panvk_priv_mem mem; struct { struct mali_texture_packed tex; diff --git a/src/panfrost/vulkan/panvk_vX_buffer_view.c b/src/panfrost/vulkan/panvk_vX_buffer_view.c index 694322f9572..35e58a37151 100644 --- a/src/panfrost/vulkan/panvk_vX_buffer_view.c +++ b/src/panfrost/vulkan/panvk_vX_buffer_view.c @@ -92,14 +92,16 @@ panvk_per_arch(CreateBufferView)(VkDevice _device, pan_image_layout_init(arch, &plane.layout, NULL); - unsigned bo_size = GENX(panfrost_estimate_texture_payload_size)(&pview); + struct panvk_pool_alloc_info alloc_info = { + .alignment = pan_alignment(TEXTURE), + .size = GENX(panfrost_estimate_texture_payload_size)(&pview), + }; - view->bo = panvk_priv_bo_create(device, bo_size, 0, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + view->mem = panvk_pool_alloc_mem(&device->mempools.rw, alloc_info); struct panfrost_ptr ptr = { - .gpu = view->bo->addr.dev, - .cpu = view->bo->addr.host, + .gpu = panvk_priv_mem_dev_addr(view->mem), + .cpu = panvk_priv_mem_host_addr(view->mem), }; GENX(panfrost_new_texture)(&pview, view->descs.tex.opaque, &ptr); @@ -151,6 +153,6 @@ panvk_per_arch(DestroyBufferView)(VkDevice _device, VkBufferView bufferView, if (!view) return; - panvk_priv_bo_unref(view->bo); + panvk_pool_free_mem(&device->mempools.rw, view->mem); vk_buffer_view_destroy(&device->vk, pAllocator, &view->vk); } diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index 377dd4fba17..fe54d6ca557 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -47,6 +47,39 @@ panvk_kmod_free(const struct pan_kmod_allocator *allocator, void *data) return vk_free(vkalloc, data); } +static void +panvk_device_init_mempools(struct panvk_device *dev) +{ + struct panvk_pool_properties rw_pool_props = { + .create_flags = 0, + .slab_size = 16 * 1024, + .label = "Device RW memory pool", + .owns_bos = false, + .needs_locking = true, + .prealloc = false, + }; + + panvk_pool_init(&dev->mempools.rw, dev, NULL, &rw_pool_props); + + struct panvk_pool_properties exec_pool_props = { + .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, + .slab_size = 16 * 1024, + .label = "Device executable memory pool (shaders)", + .owns_bos = false, + .needs_locking = true, + .prealloc = false, + }; + + panvk_pool_init(&dev->mempools.exec, dev, NULL, &exec_pool_props); +} + +static void +panvk_device_cleanup_mempools(struct panvk_device *dev) +{ + panvk_pool_cleanup(&dev->mempools.rw); + panvk_pool_cleanup(&dev->mempools.exec); +} + /* Always reserve the lower 32MB. */ #define PANVK_VA_RESERVE_BOTTOM 0x2000000ull @@ -128,6 +161,8 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device, pan_kmod_vm_create(device->kmod.dev, PAN_KMOD_VM_FLAG_AUTO_VA, user_va_start, user_va_end - user_va_start); + panvk_device_init_mempools(device); + device->tiler_heap = panvk_priv_bo_create( device, 128 * 1024 * 1024, PAN_KMOD_BO_FLAG_NO_MMAP | PAN_KMOD_BO_FLAG_ALLOC_ON_FAULT, @@ -184,6 +219,7 @@ fail: panvk_per_arch(blend_shader_cache_cleanup)(device); panvk_priv_bo_unref(device->tiler_heap); panvk_priv_bo_unref(device->sample_positions); + panvk_device_cleanup_mempools(device); pan_kmod_vm_destroy(device->kmod.vm); pan_kmod_dev_destroy(device->kmod.dev); diff --git a/src/panfrost/vulkan/panvk_vX_image_view.c b/src/panfrost/vulkan/panvk_vX_image_view.c index 8a8296bb785..7f056b5cef6 100644 --- a/src/panfrost/vulkan/panvk_vX_image_view.c +++ b/src/panfrost/vulkan/panvk_vX_image_view.c @@ -144,14 +144,16 @@ panvk_per_arch(CreateImageView)(VkDevice _device, util_format_compose_swizzles(r001, view->pview.swizzle, pview.swizzle); } - unsigned bo_size = GENX(panfrost_estimate_texture_payload_size)(&pview); + struct panvk_pool_alloc_info alloc_info = { + .alignment = pan_alignment(TEXTURE), + .size = GENX(panfrost_estimate_texture_payload_size)(&pview), + }; - view->bo = panvk_priv_bo_create(device, bo_size, 0, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + view->mem = panvk_pool_alloc_mem(&device->mempools.rw, alloc_info); struct panfrost_ptr ptr = { - .gpu = view->bo->addr.dev, - .cpu = view->bo->addr.host, + .gpu = panvk_priv_mem_dev_addr(view->mem), + .cpu = panvk_priv_mem_host_addr(view->mem), }; GENX(panfrost_new_texture)(&pview, view->descs.tex.opaque, &ptr); @@ -221,6 +223,6 @@ panvk_per_arch(DestroyImageView)(VkDevice _device, VkImageView _view, if (!view) return; - panvk_priv_bo_unref(view->bo); + panvk_pool_free_mem(&device->mempools.rw, view->mem); vk_image_view_destroy(&device->vk, pAllocator, &view->vk); }