panvk: Implement a custom FB preload logic
This has several advantages over using pan_blitter for that: - we can catch allocation failures and flag the command buffer invalid - we can re-use the vk_meta_device object list to keep track of our preload shaders - we can re-use surface descriptors instead of re-emitting them every time a preload is done Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Acked-by: Eric R. Smith <eric.smith@collabora.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31441>
This commit is contained in:

committed by
Marge Bot

parent
607e517a11
commit
0bc3502ca3
@@ -2621,11 +2621,8 @@ dEQP-VK.pipeline.pipeline_library.depth.nocolor.format.x8_d24_unorm_pack32.compa
|
||||
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount-multithread,Crash
|
||||
|
||||
# physical device and device needs more robustness in allocation handling
|
||||
dEQP-VK.api.object_management.alloc_callback_fail.device,Crash
|
||||
dEQP-VK.api.object_management.alloc_callback_fail.device_group,Crash
|
||||
dEQP-VK.api.object_management.max_concurrent.device,Fail
|
||||
dEQP-VK.api.object_management.max_concurrent.device_group,Fail
|
||||
dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail.basic,Crash
|
||||
|
||||
# query pool not supported yet
|
||||
dEQP-VK.api.null_handle.destroy_query_pool,Crash
|
||||
|
@@ -750,7 +750,17 @@ GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
|
||||
force_clean_write);
|
||||
cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2],
|
||||
force_clean_write);
|
||||
#if PAN_ARCH <= 7
|
||||
/* On Bifrost, the layer_id is passed through a push_uniform, which forces
|
||||
* us to have one pre/post DCD array per layer. */
|
||||
cfg.frame_shader_dcds =
|
||||
fb->bifrost.pre_post.dcds.gpu + (layer_idx * 3 * pan_size(DRAW));
|
||||
#else
|
||||
/* On Valhall, layer_id is passed through the framebuffer frame_arg, which
|
||||
* is preloaded in r62, so we can use the same pre/post DCD array for all
|
||||
* layers. */
|
||||
cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
|
||||
#endif
|
||||
cfg.tiler =
|
||||
PAN_ARCH >= 9 ? tiler_ctx->valhall.desc : tiler_ctx->bifrost.desc;
|
||||
#endif
|
||||
|
@@ -40,7 +40,6 @@
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_priv_bo.h"
|
||||
|
||||
#include "pan_blitter.h"
|
||||
#include "pan_desc.h"
|
||||
#include "pan_encoder.h"
|
||||
#include "pan_props.h"
|
||||
|
@@ -16,6 +16,7 @@
|
||||
#include "panvk_cmd_alloc.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_fb_preload.h"
|
||||
#include "panvk_cmd_meta.h"
|
||||
#include "panvk_device.h"
|
||||
#include "panvk_entrypoints.h"
|
||||
@@ -1977,23 +1978,6 @@ resolve_attachments(struct panvk_cmd_buffer *cmdbuf)
|
||||
static uint8_t
|
||||
prepare_fb_desc(struct panvk_cmd_buffer *cmdbuf, uint32_t layer, void *fbd)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
|
||||
memset(&cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds, 0,
|
||||
sizeof(cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds));
|
||||
|
||||
if (cmdbuf->state.tls.desc.gpu) {
|
||||
ASSERTED unsigned num_preload_jobs =
|
||||
GENX(pan_preload_fb)(&dev->blitter.cache, &cmdbuf->desc_pool.base,
|
||||
&cmdbuf->state.gfx.render.fb.info, layer,
|
||||
cmdbuf->state.tls.desc.gpu, NULL);
|
||||
|
||||
/* Valhall GPUs use pre frame DCDs to preload the FB content. We
|
||||
* thus expect num_preload_jobs to be zero.
|
||||
*/
|
||||
assert(!num_preload_jobs);
|
||||
}
|
||||
|
||||
struct pan_tiler_context tiler_ctx = {
|
||||
.valhall.layer_offset = layer - (layer % MAX_LAYERS_PER_TILER_DESC),
|
||||
};
|
||||
@@ -2092,11 +2076,11 @@ wait_finish_tiling(struct panvk_cmd_buffer *cmdbuf)
|
||||
vt_sync_addr);
|
||||
}
|
||||
|
||||
static void
|
||||
static VkResult
|
||||
issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
if (!cmdbuf->state.gfx.render.fbds.gpu)
|
||||
return;
|
||||
return VK_SUCCESS;
|
||||
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
@@ -2132,6 +2116,10 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
||||
struct panfrost_ptr fbds = cmdbuf->state.gfx.render.fbds;
|
||||
uint8_t fbd_flags = 0;
|
||||
|
||||
VkResult result = panvk_per_arch(cmd_fb_preload)(cmdbuf);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
/* We prepare all FB descriptors upfront. */
|
||||
for (uint32_t i = 0; i < cmdbuf->state.gfx.render.layer_count; i++) {
|
||||
uint32_t new_fbd_flags =
|
||||
@@ -2280,6 +2268,8 @@ issue_fragment_jobs(struct panvk_cmd_buffer *cmdbuf)
|
||||
memset(&cmdbuf->state.gfx.render.fbds, 0,
|
||||
sizeof(cmdbuf->state.gfx.render.fbds));
|
||||
cmdbuf->state.gfx.render.tiler = 0;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -32,6 +32,7 @@
|
||||
#include "panvk_cmd_alloc.h"
|
||||
#include "panvk_cmd_buffer.h"
|
||||
#include "panvk_cmd_desc_state.h"
|
||||
#include "panvk_cmd_fb_preload.h"
|
||||
#include "panvk_cmd_pool.h"
|
||||
#include "panvk_cmd_push_constant.h"
|
||||
#include "panvk_device.h"
|
||||
@@ -40,7 +41,6 @@
|
||||
#include "panvk_physical_device.h"
|
||||
#include "panvk_priv_bo.h"
|
||||
|
||||
#include "pan_blitter.h"
|
||||
#include "pan_desc.h"
|
||||
#include "pan_encoder.h"
|
||||
#include "pan_props.h"
|
||||
@@ -140,22 +140,16 @@ panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
|
||||
panfrost_sample_positions_offset(
|
||||
pan_sample_pattern(fbinfo->nr_samples));
|
||||
|
||||
if (batch->vtc_jc.first_tiler) {
|
||||
VkResult result = panvk_per_arch(cmd_fb_preload)(cmdbuf);
|
||||
if (result != VK_SUCCESS)
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < batch->fb.layer_count; i++) {
|
||||
VkResult result;
|
||||
|
||||
mali_ptr fbd = batch->fb.desc.gpu + (batch->fb.desc_stride * i);
|
||||
if (batch->vtc_jc.first_tiler) {
|
||||
cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds.gpu = 0;
|
||||
|
||||
ASSERTED unsigned num_preload_jobs = GENX(pan_preload_fb)(
|
||||
&dev->blitter.cache, &cmdbuf->desc_pool.base,
|
||||
&cmdbuf->state.gfx.render.fb.info, i, batch->tls.gpu, NULL);
|
||||
|
||||
/* Bifrost GPUs use pre frame DCDs to preload the FB content. We
|
||||
* thus expect num_preload_jobs to be zero.
|
||||
*/
|
||||
assert(!num_preload_jobs);
|
||||
}
|
||||
|
||||
result = panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, i);
|
||||
if (result != VK_SUCCESS)
|
||||
|
@@ -72,6 +72,7 @@ common_per_arch_files = [
|
||||
panvk_entrypoints[0],
|
||||
'panvk_vX_blend.c',
|
||||
'panvk_vX_buffer_view.c',
|
||||
'panvk_vX_cmd_fb_preload.c',
|
||||
'panvk_vX_cmd_desc_state.c',
|
||||
'panvk_vX_cmd_meta.c',
|
||||
'panvk_vX_cmd_push_constant.c',
|
||||
|
13
src/panfrost/vulkan/panvk_cmd_fb_preload.h
Normal file
13
src/panfrost/vulkan/panvk_cmd_fb_preload.h
Normal file
@@ -0,0 +1,13 @@
|
||||
/*
|
||||
* Copyright © 2021 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef PANVK_FB_PRELOAD_H
|
||||
#define PANVK_FB_PRELOAD_H
|
||||
|
||||
#include "panvk_cmd_buffer.h"
|
||||
|
||||
VkResult panvk_per_arch(cmd_fb_preload)(struct panvk_cmd_buffer *cmdbuf);
|
||||
|
||||
#endif
|
@@ -20,7 +20,6 @@
|
||||
|
||||
#include "kmod/pan_kmod.h"
|
||||
#include "util/pan_ir.h"
|
||||
#include "pan_blitter.h"
|
||||
|
||||
#include "util/vma.h"
|
||||
|
||||
@@ -43,22 +42,6 @@ struct panvk_device {
|
||||
struct panvk_priv_bo *tiler_heap;
|
||||
struct panvk_priv_bo *sample_positions;
|
||||
|
||||
/* Access to the blitter pools are protected by the blitter
|
||||
* shader/rsd locks. They can't be merged with other binary/desc
|
||||
* pools unless we patch pan_blitter.c to support external pool locks.
|
||||
*
|
||||
* FIXME: The blitter infrastructure is only needed for FB preload.
|
||||
* We should probably consider getting rid of the dependency we have
|
||||
* on pan_desc.c and implement preload ourselves so we don't have
|
||||
* to duplicate caches.
|
||||
*/
|
||||
struct {
|
||||
struct panvk_pool bin_pool;
|
||||
struct panvk_pool desc_pool;
|
||||
struct pan_blitter_cache cache;
|
||||
struct pan_blend_shader_cache blend_shader_cache;
|
||||
} blitter;
|
||||
|
||||
struct vk_meta_device meta;
|
||||
|
||||
struct {
|
||||
|
@@ -15,6 +15,7 @@
|
||||
enum panvk_meta_object_key_type {
|
||||
PANVK_META_OBJECT_KEY_BLEND_SHADER = VK_META_OBJECT_KEY_DRIVER_OFFSET,
|
||||
PANVK_META_OBJECT_KEY_COPY_DESC_SHADER,
|
||||
PANVK_META_OBJECT_KEY_FB_PRELOAD_SHADER,
|
||||
};
|
||||
|
||||
static inline VkFormat
|
||||
|
750
src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c
Normal file
750
src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c
Normal file
@@ -0,0 +1,750 @@
|
||||
/*
|
||||
* Copyright © 2021 Collabora Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "panvk_cmd_alloc.h"
|
||||
#include "panvk_cmd_fb_preload.h"
|
||||
#include "panvk_image_view.h"
|
||||
#include "panvk_meta.h"
|
||||
#include "panvk_shader.h"
|
||||
|
||||
#include "nir_builder.h"
|
||||
|
||||
#include "pan_shader.h"
|
||||
|
||||
struct panvk_fb_preload_shader_key {
|
||||
enum panvk_meta_object_key_type type;
|
||||
VkImageViewType view_type;
|
||||
VkSampleCountFlagBits samples;
|
||||
VkImageAspectFlags aspects;
|
||||
bool needs_layer_id;
|
||||
struct {
|
||||
nir_alu_type type;
|
||||
} color[8];
|
||||
};
|
||||
|
||||
static nir_def *
|
||||
texel_fetch(nir_builder *b, VkImageViewType view_type,
|
||||
nir_alu_type reg_type, unsigned tex_idx,
|
||||
nir_def *sample_id, nir_def *coords)
|
||||
{
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, sample_id ? 3 : 2);
|
||||
|
||||
tex->op = sample_id ? nir_texop_txf_ms : nir_texop_txf;
|
||||
tex->dest_type = reg_type;
|
||||
tex->is_array = vk_image_view_type_is_array(view_type);
|
||||
tex->sampler_dim = sample_id ? GLSL_SAMPLER_DIM_MS
|
||||
: vk_image_view_type_to_sampler_dim(view_type);
|
||||
tex->coord_components = coords->num_components;
|
||||
tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coords);
|
||||
tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(b, 0));
|
||||
|
||||
if (sample_id)
|
||||
tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_ms_index, sample_id);
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
tex->sampler_index = 0;
|
||||
tex->texture_index = tex_idx;
|
||||
#else
|
||||
tex->sampler_index = pan_res_handle(0, 0);
|
||||
tex->texture_index = pan_res_handle(0, tex_idx + 1);
|
||||
#endif
|
||||
|
||||
nir_def_init(&tex->instr, &tex->def, 4, 32);
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
|
||||
return &tex->def;
|
||||
}
|
||||
|
||||
static nir_variable *
|
||||
color_output_var(nir_builder *b, VkImageViewType view_type,
|
||||
VkImageAspectFlags aspect, VkSampleCountFlagBits samples,
|
||||
nir_alu_type fmt_type, unsigned rt)
|
||||
{
|
||||
enum glsl_base_type base_type =
|
||||
nir_get_glsl_base_type_for_nir_type(fmt_type);
|
||||
const struct glsl_type *var_type = glsl_vector_type(base_type, 4);
|
||||
static const char *var_names[] = {
|
||||
"gl_FragData[0]", "gl_FragData[1]", "gl_FragData[2]", "gl_FragData[3]",
|
||||
"gl_FragData[4]", "gl_FragData[5]", "gl_FragData[6]", "gl_FragData[7]",
|
||||
};
|
||||
|
||||
assert(rt < ARRAY_SIZE(var_names));
|
||||
|
||||
nir_variable *var = nir_variable_create(b->shader, nir_var_shader_out,
|
||||
var_type, var_names[rt]);
|
||||
var->data.location = FRAG_RESULT_DATA0 + rt;
|
||||
|
||||
return var;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
get_layer_id(nir_builder *b)
|
||||
{
|
||||
#if PAN_ARCH <= 7
|
||||
return nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 0,
|
||||
.range = 4);
|
||||
#else
|
||||
return nir_load_layer_id(b);
|
||||
#endif
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
get_preload_nir_shader(const struct panvk_fb_preload_shader_key *key)
|
||||
{
|
||||
nir_builder builder = nir_builder_init_simple_shader(
|
||||
MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
|
||||
"panvk-meta-preload");
|
||||
nir_builder *b = &builder;
|
||||
nir_def *sample_id =
|
||||
key->samples != VK_SAMPLE_COUNT_1_BIT ? nir_load_sample_id(b) : NULL;
|
||||
nir_def *coords = nir_u2u32(b, nir_load_pixel_coord(b));
|
||||
|
||||
if (key->view_type == VK_IMAGE_VIEW_TYPE_2D_ARRAY ||
|
||||
key->view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY ||
|
||||
key->view_type == VK_IMAGE_VIEW_TYPE_CUBE ||
|
||||
key->view_type == VK_IMAGE_VIEW_TYPE_3D) {
|
||||
coords =
|
||||
nir_vec3(b, nir_channel(b, coords, 0), nir_channel(b, coords, 1),
|
||||
key->needs_layer_id ? get_layer_id(b) : nir_imm_int(b, 0));
|
||||
}
|
||||
|
||||
if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(key->color); i++) {
|
||||
if (key->color[i].type == nir_type_invalid)
|
||||
continue;
|
||||
|
||||
nir_def *texel = texel_fetch(b, key->view_type, key->color[i].type, i,
|
||||
sample_id, coords);
|
||||
|
||||
nir_store_output(
|
||||
b, texel, nir_imm_int(b, 0), .base = i,
|
||||
.src_type = key->color[i].type,
|
||||
.io_semantics.location = FRAG_RESULT_DATA0 + i,
|
||||
.io_semantics.num_slots = 1,
|
||||
.write_mask = nir_component_mask(texel->num_components));
|
||||
}
|
||||
}
|
||||
|
||||
if (key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||||
nir_def *texel = texel_fetch(b, key->view_type, nir_type_float32, 0,
|
||||
sample_id, coords);
|
||||
|
||||
nir_store_output(b, nir_channel(b, texel, 0), nir_imm_int(b, 0),
|
||||
.base = 0, .src_type = nir_type_float32,
|
||||
.io_semantics.location = FRAG_RESULT_DEPTH,
|
||||
.io_semantics.num_slots = 1,
|
||||
.write_mask = nir_component_mask(1));
|
||||
}
|
||||
|
||||
if (key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
nir_def *texel = texel_fetch(
|
||||
b, key->view_type, nir_type_uint32,
|
||||
key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 0, sample_id, coords);
|
||||
|
||||
nir_store_output(b, nir_channel(b, texel, 0), nir_imm_int(b, 0),
|
||||
.base = 0, .src_type = nir_type_uint32,
|
||||
.io_semantics.location = FRAG_RESULT_STENCIL,
|
||||
.io_semantics.num_slots = 1,
|
||||
.write_mask = nir_component_mask(1));
|
||||
}
|
||||
|
||||
return b->shader;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
get_preload_shader(struct panvk_device *dev,
|
||||
const struct panvk_fb_preload_shader_key *key,
|
||||
struct panvk_internal_shader **shader_out)
|
||||
{
|
||||
struct panvk_physical_device *phys_dev =
|
||||
to_panvk_physical_device(dev->vk.physical);
|
||||
struct panvk_internal_shader *shader;
|
||||
VkShaderEXT shader_handle = (VkShaderEXT)vk_meta_lookup_object(
|
||||
&dev->meta, VK_OBJECT_TYPE_SHADER_EXT, key, sizeof(*key));
|
||||
if (shader_handle != VK_NULL_HANDLE)
|
||||
goto out;
|
||||
|
||||
nir_shader *nir = get_preload_nir_shader(key);
|
||||
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
||||
struct panfrost_compile_inputs inputs = {
|
||||
.gpu_id = phys_dev->kmod.props.gpu_prod_id,
|
||||
.no_ubo_to_push = true,
|
||||
};
|
||||
|
||||
pan_shader_preprocess(nir, inputs.gpu_id);
|
||||
|
||||
VkResult result = panvk_per_arch(create_internal_shader)(
|
||||
dev, nir, &inputs, &shader);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
shader->spd = panvk_pool_alloc_desc(&dev->mempools.rw, SHADER_PROGRAM);
|
||||
if (!panvk_priv_mem_host_addr(shader->spd)) {
|
||||
vk_shader_destroy(&dev->vk, &shader->vk, NULL);
|
||||
return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
pan_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, cfg) {
|
||||
cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
|
||||
cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
|
||||
cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
|
||||
cfg.binary = panvk_priv_mem_dev_addr(shader->code_mem);
|
||||
cfg.preload.r48_r63 = shader->info.preload >> 48;
|
||||
}
|
||||
#endif
|
||||
|
||||
shader_handle = (VkShaderEXT)vk_meta_cache_object(
|
||||
&dev->vk, &dev->meta, key, sizeof(*key), VK_OBJECT_TYPE_SHADER_EXT,
|
||||
(uint64_t)panvk_internal_shader_to_handle(shader));
|
||||
|
||||
out:
|
||||
shader = panvk_internal_shader_from_handle(shader_handle);
|
||||
*shader_out = shader;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
alloc_pre_post_dcds(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
|
||||
if (fbinfo->bifrost.pre_post.dcds.gpu)
|
||||
return VK_SUCCESS;
|
||||
|
||||
uint32_t dcd_count =
|
||||
3 * (PAN_ARCH <= 7 ? cmdbuf->state.gfx.render.layer_count : 1);
|
||||
|
||||
fbinfo->bifrost.pre_post.dcds = panvk_cmd_alloc_desc_array(cmdbuf, dcd_count, DRAW);
|
||||
if (!fbinfo->bifrost.pre_post.dcds.cpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static enum mali_register_file_format
|
||||
get_reg_fmt(nir_alu_type type)
|
||||
{
|
||||
switch (type) {
|
||||
case nir_type_float32:
|
||||
return MALI_REGISTER_FILE_FORMAT_F32;
|
||||
case nir_type_uint32:
|
||||
return MALI_REGISTER_FILE_FORMAT_U32;
|
||||
case nir_type_int32:
|
||||
return MALI_REGISTER_FILE_FORMAT_I32;
|
||||
default:
|
||||
assert(!"Invalid reg type");
|
||||
return MALI_REGISTER_FILE_FORMAT_F32;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fill_textures(struct panvk_cmd_buffer *cmdbuf,
|
||||
const struct panvk_fb_preload_shader_key *key,
|
||||
struct mali_texture_packed *textures)
|
||||
{
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
|
||||
if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
for (unsigned i = 0; i < fbinfo->rt_count; i++) {
|
||||
struct panvk_image_view *iview =
|
||||
cmdbuf->state.gfx.render.color_attachments.iviews[i];
|
||||
|
||||
if (iview)
|
||||
textures[i] = iview->descs.tex;
|
||||
else
|
||||
textures[i] = (struct mali_texture_packed){0};
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t idx = 0;
|
||||
if (key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||||
struct panvk_image_view *iview =
|
||||
cmdbuf->state.gfx.render.z_attachment.iview
|
||||
?: cmdbuf->state.gfx.render.s_attachment.iview;
|
||||
|
||||
textures[idx++] = vk_format_has_depth(iview->vk.view_format)
|
||||
? iview->descs.tex
|
||||
: iview->descs.other_aspect_tex;
|
||||
}
|
||||
|
||||
if (key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
struct panvk_image_view *iview =
|
||||
cmdbuf->state.gfx.render.s_attachment.iview
|
||||
?: cmdbuf->state.gfx.render.z_attachment.iview;
|
||||
|
||||
textures[idx++] = vk_format_has_depth(iview->vk.view_format)
|
||||
? iview->descs.other_aspect_tex
|
||||
: iview->descs.tex;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fill_bds(struct panvk_cmd_buffer *cmdbuf,
|
||||
const struct panvk_fb_preload_shader_key *key,
|
||||
struct mali_blend_packed *bds)
|
||||
{
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
uint32_t bd_count = MAX2(fbinfo->rt_count, 1);
|
||||
|
||||
for (unsigned i = 0; i < bd_count; i++) {
|
||||
const struct pan_image_view *pview =
|
||||
fbinfo->rts[i].preload ? fbinfo->rts[i].view : NULL;
|
||||
|
||||
pan_pack(&bds[i], BLEND, cfg) {
|
||||
if (key->aspects != VK_IMAGE_ASPECT_COLOR_BIT || !pview) {
|
||||
cfg.enable = false;
|
||||
cfg.internal.mode = MALI_BLEND_MODE_OFF;
|
||||
continue;
|
||||
}
|
||||
|
||||
cfg.round_to_fb_precision = true;
|
||||
cfg.srgb = util_format_is_srgb(pview->format);
|
||||
cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
|
||||
cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
|
||||
cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
|
||||
cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
|
||||
cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
|
||||
cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
|
||||
cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
|
||||
cfg.equation.color_mask = 0xf;
|
||||
|
||||
cfg.internal.fixed_function.num_comps = 4;
|
||||
cfg.internal.fixed_function.conversion.memory_format = GENX(
|
||||
panfrost_dithered_format_from_pipe_format)(pview->format, false);
|
||||
cfg.internal.fixed_function.rt = i;
|
||||
#if PAN_ARCH <= 7
|
||||
cfg.internal.fixed_function.conversion.register_format =
|
||||
get_reg_fmt(key->color[i].type);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
static VkResult
|
||||
cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf,
|
||||
const struct panvk_fb_preload_shader_key *key)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
struct panvk_internal_shader *shader = NULL;
|
||||
|
||||
VkResult result = get_preload_shader(dev, key, &shader);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
uint32_t tex_count = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT
|
||||
? fbinfo->rt_count
|
||||
: util_bitcount(key->aspects);
|
||||
uint32_t bd_count = MAX2(fbinfo->rt_count, 1);
|
||||
|
||||
struct panfrost_ptr rsd = panvk_cmd_alloc_desc_aggregate(
|
||||
cmdbuf, PAN_DESC(RENDERER_STATE),
|
||||
PAN_DESC_ARRAY(bd_count, BLEND));
|
||||
if (!rsd.cpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
pan_pack(rsd.cpu, RENDERER_STATE, cfg) {
|
||||
pan_shader_prepare_rsd(&shader->info,
|
||||
panvk_priv_mem_dev_addr(shader->code_mem), &cfg);
|
||||
|
||||
cfg.shader.texture_count = tex_count;
|
||||
cfg.shader.sampler_count = 1;
|
||||
|
||||
cfg.multisample_misc.sample_mask = 0xFFFF;
|
||||
cfg.multisample_misc.multisample_enable = key->samples > 1;
|
||||
cfg.multisample_misc.evaluate_per_sample = key->samples > 1;
|
||||
|
||||
cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
|
||||
cfg.multisample_misc.depth_write_mask =
|
||||
(key->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) != 0;
|
||||
|
||||
cfg.stencil_mask_misc.stencil_enable =
|
||||
(key->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) != 0;
|
||||
cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
|
||||
cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
|
||||
cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
|
||||
cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
|
||||
cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
|
||||
cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
|
||||
cfg.stencil_front.mask = 0xFF;
|
||||
|
||||
cfg.stencil_back = cfg.stencil_front;
|
||||
|
||||
if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
/* Skipping ATEST requires forcing Z/S */
|
||||
cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
|
||||
cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
|
||||
} else {
|
||||
/* Writing Z/S requires late updates */
|
||||
cfg.properties.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
|
||||
cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
|
||||
}
|
||||
|
||||
/* However, while shaders writing Z/S can normally be killed, on v6
|
||||
* for frame shaders it can cause GPU timeouts, so only allow colour
|
||||
* blit shaders to be killed. */
|
||||
cfg.properties.allow_forward_pixel_to_kill =
|
||||
key->aspects == VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
|
||||
if (PAN_ARCH == 6)
|
||||
cfg.properties.allow_forward_pixel_to_be_killed =
|
||||
key->aspects == VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
}
|
||||
|
||||
fill_bds(cmdbuf, key, rsd.cpu + pan_size(RENDERER_STATE));
|
||||
|
||||
struct panvk_batch *batch = cmdbuf->cur_batch;
|
||||
uint16_t minx = 0, miny = 0, maxx, maxy;
|
||||
|
||||
/* Align on 32x32 tiles */
|
||||
minx = fbinfo->extent.minx & ~31;
|
||||
miny = fbinfo->extent.miny & ~31;
|
||||
maxx = MIN2(ALIGN_POT(fbinfo->extent.maxx + 1, 32), fbinfo->width) - 1;
|
||||
maxy = MIN2(ALIGN_POT(fbinfo->extent.maxy + 1, 32), fbinfo->height) - 1;
|
||||
|
||||
struct panfrost_ptr vpd = panvk_cmd_alloc_desc(cmdbuf, VIEWPORT);
|
||||
if (!vpd.cpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
pan_pack(vpd.cpu, VIEWPORT, cfg) {
|
||||
cfg.scissor_minimum_x = minx;
|
||||
cfg.scissor_minimum_y = miny;
|
||||
cfg.scissor_maximum_x = maxx;
|
||||
cfg.scissor_maximum_y = maxy;
|
||||
}
|
||||
|
||||
struct panfrost_ptr sampler = panvk_cmd_alloc_desc(cmdbuf, SAMPLER);
|
||||
if (!sampler.cpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
pan_pack(sampler.cpu, SAMPLER, cfg) {
|
||||
cfg.seamless_cube_map = false;
|
||||
cfg.normalized_coordinates = false;
|
||||
cfg.minify_nearest = true;
|
||||
cfg.magnify_nearest = true;
|
||||
}
|
||||
|
||||
struct panfrost_ptr textures =
|
||||
panvk_cmd_alloc_desc_array(cmdbuf, tex_count, TEXTURE);
|
||||
if (!textures.cpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
fill_textures(cmdbuf, key, textures.cpu);
|
||||
|
||||
result = alloc_pre_post_dcds(cmdbuf);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct mali_draw_packed dcd_base;
|
||||
|
||||
pan_pack(&dcd_base, DRAW, cfg) {
|
||||
cfg.thread_storage = batch->tls.gpu;
|
||||
cfg.state = rsd.gpu;
|
||||
|
||||
cfg.viewport = vpd.gpu;
|
||||
|
||||
cfg.textures = textures.gpu;
|
||||
cfg.samplers = sampler.gpu;
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
/* Until we decide to support FB CRC, we can consider that untouched tiles
|
||||
* should never be written back. */
|
||||
cfg.clean_fragment_write = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
struct mali_draw_packed *dcds = fbinfo->bifrost.pre_post.dcds.cpu;
|
||||
uint32_t dcd_idx = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT ? 0 : 1;
|
||||
|
||||
if (key->needs_layer_id) {
|
||||
struct panfrost_ptr layer_ids = panvk_cmd_alloc_dev_mem(
|
||||
cmdbuf, desc,
|
||||
cmdbuf->state.gfx.render.layer_count * sizeof(uint64_t),
|
||||
sizeof(uint64_t));
|
||||
uint32_t *layer_id = layer_ids.cpu;
|
||||
|
||||
for (uint32_t l = 0; l < cmdbuf->state.gfx.render.layer_count; l++) {
|
||||
struct mali_draw_packed dcd_layer;
|
||||
|
||||
/* Push uniform pointer has to be 8-byte aligned, so we have to skip
|
||||
* odd layer_id entries. */
|
||||
layer_id[2 * l] = l;
|
||||
pan_pack(&dcd_layer, DRAW, cfg) {
|
||||
cfg.push_uniforms = layer_ids.gpu + (sizeof(uint64_t) * l);
|
||||
};
|
||||
|
||||
pan_merge(dcd_layer, dcd_base, DRAW);
|
||||
dcds[(l * 3) + dcd_idx] = dcd_layer;
|
||||
}
|
||||
} else {
|
||||
dcds[dcd_idx] = dcd_base;
|
||||
}
|
||||
|
||||
if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
fbinfo->bifrost.pre_post.modes[dcd_idx] =
|
||||
MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
|
||||
} else {
|
||||
enum pipe_format fmt = fbinfo->zs.view.zs
|
||||
? fbinfo->zs.view.zs->planes[0]->layout.format
|
||||
: fbinfo->zs.view.s->planes[0]->layout.format;
|
||||
bool always = false;
|
||||
|
||||
/* If we're dealing with a combined ZS resource and only one
|
||||
* component is cleared, we need to reload the whole surface
|
||||
* because the zs_clean_pixel_write_enable flag is set in that
|
||||
* case.
|
||||
*/
|
||||
if (util_format_is_depth_and_stencil(fmt) &&
|
||||
fbinfo->zs.clear.z != fbinfo->zs.clear.s)
|
||||
always = true;
|
||||
|
||||
/* We could use INTERSECT on Bifrost v7 too, but
|
||||
* EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
|
||||
* buffer one or more tiles ahead, making ZS data immediately
|
||||
* available for any ZS tests taking place in other shaders.
|
||||
* Thing's haven't been benchmarked to determine what's
|
||||
* preferable (saving bandwidth vs having ZS preloaded
|
||||
* earlier), so let's leave it like that for now.
|
||||
*/
|
||||
fbinfo->bifrost.pre_post.modes[dcd_idx] =
|
||||
PAN_ARCH > 6
|
||||
? MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS
|
||||
: always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
|
||||
: MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
#else
|
||||
static VkResult
|
||||
cmd_emit_dcd(struct panvk_cmd_buffer *cmdbuf,
|
||||
struct panvk_fb_preload_shader_key *key)
|
||||
{
|
||||
struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
struct panvk_internal_shader *shader = NULL;
|
||||
|
||||
VkResult result = get_preload_shader(dev, key, &shader);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
uint32_t bd_count =
|
||||
key->aspects == VK_IMAGE_ASPECT_COLOR_BIT ? fbinfo->rt_count : 0;
|
||||
struct panfrost_ptr bds =
|
||||
panvk_cmd_alloc_desc_array(cmdbuf, bd_count, BLEND);
|
||||
if (bd_count > 0 && !bds.cpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
uint32_t tex_count = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT
|
||||
? fbinfo->rt_count
|
||||
: util_bitcount(key->aspects);
|
||||
uint32_t desc_count = tex_count + 1;
|
||||
|
||||
struct panfrost_ptr descs = panvk_cmd_alloc_dev_mem(
|
||||
cmdbuf, desc, desc_count * PANVK_DESCRIPTOR_SIZE, PANVK_DESCRIPTOR_SIZE);
|
||||
if (!descs.cpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
struct mali_sampler_packed *sampler = descs.cpu;
|
||||
|
||||
pan_pack(sampler, SAMPLER, cfg) {
|
||||
cfg.seamless_cube_map = false;
|
||||
cfg.normalized_coordinates = false;
|
||||
cfg.minify_nearest = true;
|
||||
cfg.magnify_nearest = true;
|
||||
}
|
||||
|
||||
fill_textures(cmdbuf, key, descs.cpu + PANVK_DESCRIPTOR_SIZE);
|
||||
|
||||
if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT)
|
||||
fill_bds(cmdbuf, key, bds.cpu);
|
||||
|
||||
struct panfrost_ptr res_table = panvk_cmd_alloc_desc(cmdbuf, RESOURCE);
|
||||
if (!res_table.cpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
pan_pack(res_table.cpu, RESOURCE, cfg) {
|
||||
cfg.address = descs.gpu;
|
||||
cfg.size = desc_count * PANVK_DESCRIPTOR_SIZE;
|
||||
}
|
||||
|
||||
struct panfrost_ptr zsd = panvk_cmd_alloc_desc(cmdbuf, DEPTH_STENCIL);
|
||||
if (!zsd.cpu)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
|
||||
cfg.depth_function = MALI_FUNC_ALWAYS;
|
||||
cfg.depth_write_enable = fbinfo->zs.preload.z;
|
||||
|
||||
if (fbinfo->zs.preload.z)
|
||||
cfg.depth_source = MALI_DEPTH_SOURCE_SHADER;
|
||||
|
||||
cfg.stencil_test_enable = fbinfo->zs.preload.s;
|
||||
cfg.stencil_from_shader = fbinfo->zs.preload.s;
|
||||
|
||||
cfg.front_compare_function = MALI_FUNC_ALWAYS;
|
||||
cfg.front_stencil_fail = MALI_STENCIL_OP_REPLACE;
|
||||
cfg.front_depth_fail = MALI_STENCIL_OP_REPLACE;
|
||||
cfg.front_depth_pass = MALI_STENCIL_OP_REPLACE;
|
||||
cfg.front_write_mask = 0xFF;
|
||||
cfg.front_value_mask = 0xFF;
|
||||
|
||||
cfg.back_compare_function = MALI_FUNC_ALWAYS;
|
||||
cfg.back_stencil_fail = MALI_STENCIL_OP_REPLACE;
|
||||
cfg.back_depth_fail = MALI_STENCIL_OP_REPLACE;
|
||||
cfg.back_depth_pass = MALI_STENCIL_OP_REPLACE;
|
||||
cfg.back_write_mask = 0xFF;
|
||||
cfg.back_value_mask = 0xFF;
|
||||
|
||||
cfg.depth_cull_enable = false;
|
||||
}
|
||||
|
||||
result = alloc_pre_post_dcds(cmdbuf);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct mali_draw_packed *dcds = fbinfo->bifrost.pre_post.dcds.cpu;
|
||||
uint32_t dcd_idx = key->aspects == VK_IMAGE_ASPECT_COLOR_BIT ? 0 : 1;
|
||||
|
||||
pan_pack(&dcds[dcd_idx], DRAW, cfg) {
|
||||
if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
/* Skipping ATEST requires forcing Z/S */
|
||||
cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
|
||||
cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
|
||||
|
||||
cfg.blend = bds.gpu;
|
||||
cfg.blend_count = bd_count;
|
||||
cfg.render_target_mask = cmdbuf->state.gfx.render.bound_attachments;
|
||||
} else {
|
||||
/* ZS_EMIT requires late update/kill */
|
||||
cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
|
||||
cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
|
||||
cfg.blend_count = 0;
|
||||
}
|
||||
|
||||
cfg.allow_forward_pixel_to_kill =
|
||||
key->aspects == VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
cfg.allow_forward_pixel_to_be_killed = true;
|
||||
cfg.depth_stencil = zsd.gpu;
|
||||
cfg.sample_mask = 0xFFFF;
|
||||
cfg.multisample_enable = key->samples > 1;
|
||||
cfg.evaluate_per_sample = key->samples > 1;
|
||||
cfg.maximum_z = 1.0;
|
||||
cfg.clean_fragment_write = false;
|
||||
cfg.shader.resources = res_table.gpu | 1;
|
||||
cfg.shader.shader = panvk_priv_mem_dev_addr(shader->spd);
|
||||
cfg.shader.thread_storage = cmdbuf->state.gfx.tsd;
|
||||
}
|
||||
|
||||
if (key->aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
fbinfo->bifrost.pre_post.modes[dcd_idx] =
|
||||
MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
|
||||
} else {
|
||||
/* We could use INTERSECT on Valhall too, but
|
||||
* EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
|
||||
* buffer one or more tiles ahead, making ZS data immediately
|
||||
* available for any ZS tests taking place in other shaders.
|
||||
* Thing's haven't been benchmarked to determine what's
|
||||
* preferable (saving bandwidth vs having ZS preloaded
|
||||
* earlier), so let's leave it like that for now.
|
||||
*/
|
||||
fbinfo->bifrost.pre_post.modes[dcd_idx] =
|
||||
MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
static VkResult
|
||||
cmd_preload_zs_attachments(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
|
||||
if (!fbinfo->zs.preload.s && !fbinfo->zs.preload.z)
|
||||
return VK_SUCCESS;
|
||||
|
||||
struct panvk_fb_preload_shader_key key = {
|
||||
.type = PANVK_META_OBJECT_KEY_FB_PRELOAD_SHADER,
|
||||
.samples = fbinfo->nr_samples,
|
||||
.needs_layer_id = cmdbuf->state.gfx.render.layer_count > 1,
|
||||
};
|
||||
|
||||
if (fbinfo->zs.preload.z) {
|
||||
key.aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
key.view_type =
|
||||
cmdbuf->state.gfx.render.z_attachment.iview
|
||||
? cmdbuf->state.gfx.render.z_attachment.iview->vk.view_type
|
||||
: cmdbuf->state.gfx.render.s_attachment.iview->vk.view_type;
|
||||
}
|
||||
|
||||
if (fbinfo->zs.preload.s) {
|
||||
VkImageViewType view_type =
|
||||
cmdbuf->state.gfx.render.s_attachment.iview
|
||||
? cmdbuf->state.gfx.render.s_attachment.iview->vk.view_type
|
||||
: cmdbuf->state.gfx.render.z_attachment.iview->vk.view_type;
|
||||
|
||||
key.aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
if (!fbinfo->zs.preload.z)
|
||||
key.view_type = view_type;
|
||||
|
||||
assert(key.view_type == view_type);
|
||||
}
|
||||
|
||||
return cmd_emit_dcd(cmdbuf, &key);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
cmd_preload_color_attachments(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
|
||||
struct panvk_fb_preload_shader_key key = {
|
||||
.type = PANVK_META_OBJECT_KEY_FB_PRELOAD_SHADER,
|
||||
.samples = fbinfo->nr_samples,
|
||||
.needs_layer_id = cmdbuf->state.gfx.render.layer_count > 1,
|
||||
.aspects = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
};
|
||||
bool needs_preload = false;
|
||||
|
||||
for (uint32_t i = 0; i < fbinfo->rt_count; i++) {
|
||||
if (!fbinfo->rts[i].preload)
|
||||
continue;
|
||||
|
||||
enum pipe_format pfmt = fbinfo->rts[i].view->format;
|
||||
struct panvk_image_view *iview =
|
||||
cmdbuf->state.gfx.render.color_attachments.iviews[i];
|
||||
|
||||
key.color[i].type = util_format_is_pure_uint(pfmt) ? nir_type_uint32
|
||||
: util_format_is_pure_sint(pfmt) ? nir_type_int32
|
||||
: nir_type_float32;
|
||||
|
||||
if (!needs_preload) {
|
||||
key.view_type = iview->vk.view_type;
|
||||
needs_preload = true;
|
||||
}
|
||||
|
||||
assert(key.view_type == iview->vk.view_type);
|
||||
}
|
||||
|
||||
if (!needs_preload)
|
||||
return VK_SUCCESS;
|
||||
|
||||
return cmd_emit_dcd(cmdbuf, &key);
|
||||
}
|
||||
|
||||
VkResult
|
||||
panvk_per_arch(cmd_fb_preload)(struct panvk_cmd_buffer *cmdbuf)
|
||||
{
|
||||
VkResult result = cmd_preload_color_attachments(cmdbuf);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
return cmd_preload_zs_attachments(cmdbuf);
|
||||
}
|
@@ -150,49 +150,6 @@ panvk_meta_cleanup(struct panvk_device *device)
|
||||
vk_meta_device_finish(&device->vk, &device->meta);
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_preload_blitter_init(struct panvk_device *device)
|
||||
{
|
||||
const struct panvk_physical_device *physical_device =
|
||||
to_panvk_physical_device(device->vk.physical);
|
||||
|
||||
struct panvk_pool_properties bin_pool_props = {
|
||||
.create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE,
|
||||
.slab_size = 16 * 1024,
|
||||
.label = "panvk_meta blitter binary pool",
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
.prealloc = false,
|
||||
};
|
||||
panvk_pool_init(&device->blitter.bin_pool, device, NULL, &bin_pool_props);
|
||||
|
||||
struct panvk_pool_properties desc_pool_props = {
|
||||
.create_flags = 0,
|
||||
.slab_size = 16 * 1024,
|
||||
.label = "panvk_meta blitter descriptor pool",
|
||||
.owns_bos = true,
|
||||
.needs_locking = false,
|
||||
.prealloc = false,
|
||||
};
|
||||
panvk_pool_init(&device->blitter.desc_pool, device, NULL, &desc_pool_props);
|
||||
|
||||
pan_blend_shader_cache_init(&device->blitter.blend_shader_cache,
|
||||
physical_device->kmod.props.gpu_prod_id);
|
||||
GENX(pan_blitter_cache_init)
|
||||
(&device->blitter.cache, physical_device->kmod.props.gpu_prod_id,
|
||||
&device->blitter.blend_shader_cache, &device->blitter.bin_pool.base,
|
||||
&device->blitter.desc_pool.base);
|
||||
}
|
||||
|
||||
static void
|
||||
panvk_preload_blitter_cleanup(struct panvk_device *device)
|
||||
{
|
||||
GENX(pan_blitter_cache_cleanup)(&device->blitter.cache);
|
||||
pan_blend_shader_cache_cleanup(&device->blitter.blend_shader_cache);
|
||||
panvk_pool_cleanup(&device->blitter.desc_pool);
|
||||
panvk_pool_cleanup(&device->blitter.bin_pool);
|
||||
}
|
||||
|
||||
/* Always reserve the lower 32MB. */
|
||||
#define PANVK_VA_RESERVE_BOTTOM 0x2000000ull
|
||||
|
||||
@@ -311,11 +268,9 @@ panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
|
||||
|
||||
vk_device_set_drm_fd(&device->vk, device->kmod.dev->fd);
|
||||
|
||||
panvk_preload_blitter_init(device);
|
||||
|
||||
result = panvk_meta_init(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_cleanup_blitter;
|
||||
goto err_free_priv_bos;
|
||||
|
||||
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
|
||||
const VkDeviceQueueCreateInfo *queue_create =
|
||||
@@ -356,9 +311,6 @@ err_finish_queues:
|
||||
|
||||
panvk_meta_cleanup(device);
|
||||
|
||||
err_cleanup_blitter:
|
||||
panvk_preload_blitter_cleanup(device);
|
||||
|
||||
err_free_priv_bos:
|
||||
panvk_priv_bo_unref(device->sample_positions);
|
||||
panvk_priv_bo_unref(device->tiler_heap);
|
||||
@@ -393,7 +345,6 @@ panvk_per_arch(destroy_device)(struct panvk_device *device,
|
||||
}
|
||||
|
||||
panvk_meta_cleanup(device);
|
||||
panvk_preload_blitter_cleanup(device);
|
||||
panvk_priv_bo_unref(device->tiler_heap);
|
||||
panvk_priv_bo_unref(device->sample_positions);
|
||||
panvk_device_cleanup_mempools(device);
|
||||
|
Reference in New Issue
Block a user