
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30981>
818 lines
26 KiB
C
818 lines
26 KiB
C
/*
|
|
* Copyright 2024 Valve Corporation
|
|
* Copyright 2024 Alyssa Rosenzweig
|
|
* Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
#include "hk_cmd_buffer.h"
|
|
|
|
#include "agx_bo.h"
|
|
#include "agx_device.h"
|
|
#include "agx_linker.h"
|
|
#include "agx_tilebuffer.h"
|
|
#include "agx_usc.h"
|
|
#include "hk_buffer.h"
|
|
#include "hk_cmd_pool.h"
|
|
#include "hk_descriptor_set.h"
|
|
#include "hk_descriptor_set_layout.h"
|
|
#include "hk_device.h"
|
|
#include "hk_device_memory.h"
|
|
#include "hk_entrypoints.h"
|
|
#include "hk_image_view.h"
|
|
#include "hk_physical_device.h"
|
|
#include "hk_shader.h"
|
|
|
|
#include "pool.h"
|
|
#include "shader_enums.h"
|
|
#include "vk_pipeline_layout.h"
|
|
#include "vk_synchronization.h"
|
|
|
|
#include "util/list.h"
|
|
#include "util/macros.h"
|
|
#include "util/u_dynarray.h"
|
|
#include "vulkan/vulkan_core.h"
|
|
|
|
static void
|
|
hk_descriptor_state_fini(struct hk_cmd_buffer *cmd,
|
|
struct hk_descriptor_state *desc)
|
|
{
|
|
struct hk_cmd_pool *pool = hk_cmd_buffer_pool(cmd);
|
|
|
|
for (unsigned i = 0; i < HK_MAX_SETS; i++) {
|
|
vk_free(&pool->vk.alloc, desc->push[i]);
|
|
desc->push[i] = NULL;
|
|
}
|
|
}
|
|
|
|
static void
|
|
hk_free_resettable_cmd_buffer(struct hk_cmd_buffer *cmd)
|
|
{
|
|
struct hk_cmd_pool *pool = hk_cmd_buffer_pool(cmd);
|
|
struct hk_device *dev = hk_cmd_pool_device(pool);
|
|
|
|
hk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
|
|
hk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
|
|
|
|
hk_cmd_pool_free_bo_list(pool, &cmd->uploader.main.bos);
|
|
hk_cmd_pool_free_usc_bo_list(pool, &cmd->uploader.usc.bos);
|
|
|
|
list_for_each_entry_safe(struct hk_cs, it, &cmd->control_streams, node) {
|
|
list_del(&it->node);
|
|
hk_cs_destroy(it);
|
|
}
|
|
|
|
util_dynarray_foreach(&cmd->large_bos, struct agx_bo *, bo) {
|
|
agx_bo_unreference(&dev->dev, *bo);
|
|
}
|
|
|
|
util_dynarray_clear(&cmd->large_bos);
|
|
}
|
|
|
|
static void
|
|
hk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
|
|
{
|
|
struct hk_cmd_buffer *cmd =
|
|
container_of(vk_cmd_buffer, struct hk_cmd_buffer, vk);
|
|
struct hk_cmd_pool *pool = hk_cmd_buffer_pool(cmd);
|
|
|
|
hk_free_resettable_cmd_buffer(cmd);
|
|
vk_command_buffer_finish(&cmd->vk);
|
|
vk_free(&pool->vk.alloc, cmd);
|
|
}
|
|
|
|
static VkResult
|
|
hk_create_cmd_buffer(struct vk_command_pool *vk_pool,
|
|
VkCommandBufferLevel level,
|
|
struct vk_command_buffer **cmd_buffer_out)
|
|
{
|
|
struct hk_cmd_pool *pool = container_of(vk_pool, struct hk_cmd_pool, vk);
|
|
struct hk_device *dev = hk_cmd_pool_device(pool);
|
|
struct hk_cmd_buffer *cmd;
|
|
VkResult result;
|
|
|
|
cmd = vk_zalloc(&pool->vk.alloc, sizeof(*cmd), 8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
if (cmd == NULL)
|
|
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
result =
|
|
vk_command_buffer_init(&pool->vk, &cmd->vk, &hk_cmd_buffer_ops, level);
|
|
if (result != VK_SUCCESS) {
|
|
vk_free(&pool->vk.alloc, cmd);
|
|
return result;
|
|
}
|
|
|
|
util_dynarray_init(&cmd->large_bos, NULL);
|
|
|
|
cmd->vk.dynamic_graphics_state.vi = &cmd->state.gfx._dynamic_vi;
|
|
cmd->vk.dynamic_graphics_state.ms.sample_locations =
|
|
&cmd->state.gfx._dynamic_sl;
|
|
|
|
list_inithead(&cmd->uploader.main.bos);
|
|
list_inithead(&cmd->uploader.usc.bos);
|
|
list_inithead(&cmd->control_streams);
|
|
|
|
*cmd_buffer_out = &cmd->vk;
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
hk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
|
|
UNUSED VkCommandBufferResetFlags flags)
|
|
{
|
|
struct hk_cmd_buffer *cmd =
|
|
container_of(vk_cmd_buffer, struct hk_cmd_buffer, vk);
|
|
|
|
vk_command_buffer_reset(&cmd->vk);
|
|
hk_free_resettable_cmd_buffer(cmd);
|
|
|
|
cmd->uploader.main.map = NULL;
|
|
cmd->uploader.main.base = 0;
|
|
cmd->uploader.main.offset = 0;
|
|
cmd->uploader.usc.map = NULL;
|
|
cmd->uploader.usc.base = 0;
|
|
cmd->uploader.usc.offset = 0;
|
|
|
|
cmd->current_cs.gfx = NULL;
|
|
cmd->current_cs.cs = NULL;
|
|
cmd->current_cs.post_gfx = NULL;
|
|
cmd->current_cs.pre_gfx = NULL;
|
|
|
|
/* TODO: clear pool! */
|
|
|
|
memset(&cmd->state, 0, sizeof(cmd->state));
|
|
}
|
|
|
|
const struct vk_command_buffer_ops hk_cmd_buffer_ops = {
|
|
.create = hk_create_cmd_buffer,
|
|
.reset = hk_reset_cmd_buffer,
|
|
.destroy = hk_destroy_cmd_buffer,
|
|
};
|
|
|
|
static VkResult
|
|
hk_cmd_buffer_alloc_bo(struct hk_cmd_buffer *cmd, bool usc,
|
|
struct hk_cmd_bo **bo_out)
|
|
{
|
|
VkResult result = hk_cmd_pool_alloc_bo(hk_cmd_buffer_pool(cmd), usc, bo_out);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
if (usc)
|
|
list_addtail(&(*bo_out)->link, &cmd->uploader.usc.bos);
|
|
else
|
|
list_addtail(&(*bo_out)->link, &cmd->uploader.main.bos);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
struct agx_ptr
|
|
hk_pool_alloc_internal(struct hk_cmd_buffer *cmd, uint32_t size,
|
|
uint32_t alignment, bool usc)
|
|
{
|
|
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
|
struct hk_uploader *uploader =
|
|
usc ? &cmd->uploader.usc : &cmd->uploader.main;
|
|
|
|
/* Specially handle large allocations owned by the command buffer, e.g. used
|
|
* for statically allocated vertex output buffers with geometry shaders.
|
|
*/
|
|
if (size > HK_CMD_BO_SIZE) {
|
|
uint32_t flags = usc ? AGX_BO_LOW_VA : 0;
|
|
struct agx_bo *bo =
|
|
agx_bo_create(&dev->dev, size, flags, 0, "Large pool allocation");
|
|
|
|
util_dynarray_append(&cmd->large_bos, struct agx_bo *, bo);
|
|
return (struct agx_ptr){
|
|
.gpu = bo->va->addr,
|
|
.cpu = bo->map,
|
|
};
|
|
}
|
|
|
|
assert(size <= HK_CMD_BO_SIZE);
|
|
assert(alignment > 0);
|
|
|
|
uint32_t offset = align(uploader->offset, alignment);
|
|
|
|
assert(offset <= HK_CMD_BO_SIZE);
|
|
if (uploader->map != NULL && size <= HK_CMD_BO_SIZE - offset) {
|
|
uploader->offset = offset + size;
|
|
|
|
return (struct agx_ptr){
|
|
.gpu = uploader->base + offset,
|
|
.cpu = uploader->map + offset,
|
|
};
|
|
}
|
|
|
|
struct hk_cmd_bo *bo;
|
|
VkResult result = hk_cmd_buffer_alloc_bo(cmd, usc, &bo);
|
|
if (unlikely(result != VK_SUCCESS)) {
|
|
vk_command_buffer_set_error(&cmd->vk, result);
|
|
return (struct agx_ptr){0};
|
|
}
|
|
|
|
/* Pick whichever of the current upload BO and the new BO will have more
|
|
* room left to be the BO for the next upload. If our upload size is
|
|
* bigger than the old offset, we're better off burning the whole new
|
|
* upload BO on this one allocation and continuing on the current upload
|
|
* BO.
|
|
*/
|
|
if (uploader->map == NULL || size < uploader->offset) {
|
|
uploader->map = bo->bo->map;
|
|
uploader->base = bo->bo->va->addr;
|
|
uploader->offset = size;
|
|
}
|
|
|
|
return (struct agx_ptr){
|
|
.gpu = bo->bo->va->addr,
|
|
.cpu = bo->map,
|
|
};
|
|
}
|
|
|
|
uint64_t
|
|
hk_pool_upload(struct hk_cmd_buffer *cmd, const void *data, uint32_t size,
|
|
uint32_t alignment)
|
|
{
|
|
struct agx_ptr T = hk_pool_alloc(cmd, size, alignment);
|
|
if (unlikely(T.cpu == NULL))
|
|
return 0;
|
|
|
|
memcpy(T.cpu, data, size);
|
|
return T.gpu;
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
hk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
|
|
const VkCommandBufferBeginInfo *pBeginInfo)
|
|
{
|
|
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
|
|
|
hk_reset_cmd_buffer(&cmd->vk, 0);
|
|
|
|
hk_cmd_buffer_begin_compute(cmd, pBeginInfo);
|
|
hk_cmd_buffer_begin_graphics(cmd, pBeginInfo);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
hk_EndCommandBuffer(VkCommandBuffer commandBuffer)
|
|
{
|
|
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
|
|
|
assert(cmd->current_cs.gfx == NULL && cmd->current_cs.pre_gfx == NULL &&
|
|
"must end rendering before ending the command buffer");
|
|
|
|
hk_cmd_buffer_end_compute(cmd);
|
|
hk_cmd_buffer_end_compute_internal(&cmd->current_cs.post_gfx);
|
|
|
|
return vk_command_buffer_get_record_result(&cmd->vk);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
hk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
|
|
const VkDependencyInfo *pDependencyInfo)
|
|
{
|
|
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
|
|
|
/* The big hammer. We end both compute and graphics batches. Ending compute
|
|
* here is necessary to properly handle graphics->compute dependencies.
|
|
*
|
|
* XXX: perf. */
|
|
hk_cmd_buffer_end_compute(cmd);
|
|
hk_cmd_buffer_end_graphics(cmd);
|
|
}
|
|
|
|
void
|
|
hk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd, uint32_t stage_count,
|
|
const gl_shader_stage *stages,
|
|
struct vk_shader **const shaders)
|
|
{
|
|
struct hk_cmd_buffer *cmd = container_of(vk_cmd, struct hk_cmd_buffer, vk);
|
|
|
|
for (uint32_t i = 0; i < stage_count; i++) {
|
|
struct hk_api_shader *shader =
|
|
container_of(shaders[i], struct hk_api_shader, vk);
|
|
|
|
if (stages[i] == MESA_SHADER_COMPUTE || stages[i] == MESA_SHADER_KERNEL)
|
|
hk_cmd_bind_compute_shader(cmd, shader);
|
|
else
|
|
hk_cmd_bind_graphics_shader(cmd, stages[i], shader);
|
|
}
|
|
}
|
|
|
|
static void
|
|
hk_bind_descriptor_sets(UNUSED struct hk_cmd_buffer *cmd,
|
|
struct hk_descriptor_state *desc,
|
|
const VkBindDescriptorSetsInfoKHR *info)
|
|
{
|
|
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
|
|
|
|
/* Fro the Vulkan 1.3.275 spec:
|
|
*
|
|
* "When binding a descriptor set (see Descriptor Set Binding) to
|
|
* set number N...
|
|
*
|
|
* If, additionally, the previously bound descriptor set for set
|
|
* N was bound using a pipeline layout not compatible for set N,
|
|
* then all bindings in sets numbered greater than N are
|
|
* disturbed."
|
|
*
|
|
* This means that, if some earlier set gets bound in such a way that
|
|
* it changes set_dynamic_buffer_start[s], this binding is implicitly
|
|
* invalidated. Therefore, we can always look at the current value
|
|
* of set_dynamic_buffer_start[s] as the base of our dynamic buffer
|
|
* range and it's only our responsibility to adjust all
|
|
* set_dynamic_buffer_start[p] for p > s as needed.
|
|
*/
|
|
uint8_t dyn_buffer_start =
|
|
desc->root.set_dynamic_buffer_start[info->firstSet];
|
|
|
|
uint32_t next_dyn_offset = 0;
|
|
for (uint32_t i = 0; i < info->descriptorSetCount; ++i) {
|
|
unsigned s = i + info->firstSet;
|
|
VK_FROM_HANDLE(hk_descriptor_set, set, info->pDescriptorSets[i]);
|
|
|
|
if (desc->sets[s] != set) {
|
|
if (set != NULL) {
|
|
desc->root.sets[s] = hk_descriptor_set_addr(set);
|
|
desc->set_sizes[s] = set->size;
|
|
} else {
|
|
desc->root.sets[s] = 0;
|
|
desc->set_sizes[s] = 0;
|
|
}
|
|
desc->sets[s] = set;
|
|
desc->sets_dirty |= BITFIELD_BIT(s);
|
|
|
|
/* Binding descriptors invalidates push descriptors */
|
|
desc->push_dirty &= ~BITFIELD_BIT(s);
|
|
}
|
|
|
|
desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start;
|
|
|
|
if (pipeline_layout->set_layouts[s] != NULL) {
|
|
const struct hk_descriptor_set_layout *set_layout =
|
|
vk_to_hk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
|
|
|
|
if (set != NULL && set_layout->dynamic_buffer_count > 0) {
|
|
for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) {
|
|
struct hk_buffer_address addr = set->dynamic_buffers[j];
|
|
addr.base_addr += info->pDynamicOffsets[next_dyn_offset + j];
|
|
desc->root.dynamic_buffers[dyn_buffer_start + j] = addr;
|
|
}
|
|
next_dyn_offset += set->layout->dynamic_buffer_count;
|
|
}
|
|
|
|
dyn_buffer_start += set_layout->dynamic_buffer_count;
|
|
} else {
|
|
assert(set == NULL);
|
|
}
|
|
}
|
|
assert(dyn_buffer_start <= HK_MAX_DYNAMIC_BUFFERS);
|
|
assert(next_dyn_offset <= info->dynamicOffsetCount);
|
|
|
|
for (uint32_t s = info->firstSet + info->descriptorSetCount; s < HK_MAX_SETS;
|
|
s++)
|
|
desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start;
|
|
|
|
desc->root_dirty = true;
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
hk_CmdBindDescriptorSets2KHR(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
|
|
{
|
|
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
|
|
|
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
|
|
hk_bind_descriptor_sets(cmd, &cmd->state.gfx.descriptors,
|
|
pBindDescriptorSetsInfo);
|
|
}
|
|
|
|
if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
|
|
hk_bind_descriptor_sets(cmd, &cmd->state.cs.descriptors,
|
|
pBindDescriptorSetsInfo);
|
|
}
|
|
}
|
|
|
|
static void
|
|
hk_push_constants(UNUSED struct hk_cmd_buffer *cmd,
|
|
struct hk_descriptor_state *desc,
|
|
const VkPushConstantsInfoKHR *info)
|
|
{
|
|
memcpy(desc->root.push + info->offset, info->pValues, info->size);
|
|
desc->root_dirty = true;
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
hk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
|
|
const VkPushConstantsInfoKHR *pPushConstantsInfo)
|
|
{
|
|
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
|
|
|
if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
|
|
hk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo);
|
|
|
|
if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
|
|
hk_push_constants(cmd, &cmd->state.cs.descriptors, pPushConstantsInfo);
|
|
}
|
|
|
|
static struct hk_push_descriptor_set *
|
|
hk_cmd_push_descriptors(struct hk_cmd_buffer *cmd,
|
|
struct hk_descriptor_state *desc, uint32_t set)
|
|
{
|
|
assert(set < HK_MAX_SETS);
|
|
if (unlikely(desc->push[set] == NULL)) {
|
|
desc->push[set] =
|
|
vk_zalloc(&cmd->vk.pool->alloc, sizeof(*desc->push[set]), 8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
if (unlikely(desc->push[set] == NULL)) {
|
|
vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
/* Pushing descriptors replaces whatever sets are bound */
|
|
desc->sets[set] = NULL;
|
|
desc->push_dirty |= BITFIELD_BIT(set);
|
|
|
|
return desc->push[set];
|
|
}
|
|
|
|
static void
|
|
hk_push_descriptor_set(struct hk_cmd_buffer *cmd,
|
|
struct hk_descriptor_state *desc,
|
|
const VkPushDescriptorSetInfoKHR *info)
|
|
{
|
|
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
|
|
|
|
struct hk_push_descriptor_set *push_set =
|
|
hk_cmd_push_descriptors(cmd, desc, info->set);
|
|
if (unlikely(push_set == NULL))
|
|
return;
|
|
|
|
struct hk_descriptor_set_layout *set_layout =
|
|
vk_to_hk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
|
|
|
|
hk_push_descriptor_set_update(push_set, set_layout,
|
|
info->descriptorWriteCount,
|
|
info->pDescriptorWrites);
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
hk_CmdPushDescriptorSet2KHR(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo)
|
|
{
|
|
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
|
|
|
if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
|
|
hk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors,
|
|
pPushDescriptorSetInfo);
|
|
}
|
|
|
|
if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
|
|
hk_push_descriptor_set(cmd, &cmd->state.cs.descriptors,
|
|
pPushDescriptorSetInfo);
|
|
}
|
|
}
|
|
|
|
void
|
|
hk_cmd_buffer_flush_push_descriptors(struct hk_cmd_buffer *cmd,
|
|
struct hk_descriptor_state *desc)
|
|
{
|
|
u_foreach_bit(set_idx, desc->push_dirty) {
|
|
struct hk_push_descriptor_set *push_set = desc->push[set_idx];
|
|
uint64_t push_set_addr = hk_pool_upload(
|
|
cmd, push_set->data, sizeof(push_set->data), HK_MIN_UBO_ALIGNMENT);
|
|
|
|
desc->root.sets[set_idx] = push_set_addr;
|
|
desc->set_sizes[set_idx] = sizeof(push_set->data);
|
|
}
|
|
|
|
desc->root_dirty = true;
|
|
desc->push_dirty = 0;
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
hk_CmdPushDescriptorSetWithTemplate2KHR(
|
|
VkCommandBuffer commandBuffer, const VkPushDescriptorSetWithTemplateInfoKHR
|
|
*pPushDescriptorSetWithTemplateInfo)
|
|
{
|
|
VK_FROM_HANDLE(hk_cmd_buffer, cmd, commandBuffer);
|
|
VK_FROM_HANDLE(vk_descriptor_update_template, template,
|
|
pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate);
|
|
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout,
|
|
pPushDescriptorSetWithTemplateInfo->layout);
|
|
|
|
struct hk_descriptor_state *desc =
|
|
hk_get_descriptors_state(cmd, template->bind_point);
|
|
struct hk_push_descriptor_set *push_set = hk_cmd_push_descriptors(
|
|
cmd, desc, pPushDescriptorSetWithTemplateInfo->set);
|
|
if (unlikely(push_set == NULL))
|
|
return;
|
|
|
|
struct hk_descriptor_set_layout *set_layout = vk_to_hk_descriptor_set_layout(
|
|
pipeline_layout->set_layouts[pPushDescriptorSetWithTemplateInfo->set]);
|
|
|
|
hk_push_descriptor_set_update_template(
|
|
push_set, set_layout, template,
|
|
pPushDescriptorSetWithTemplateInfo->pData);
|
|
}
|
|
|
|
uint64_t
|
|
hk_cmd_buffer_upload_root(struct hk_cmd_buffer *cmd,
|
|
VkPipelineBindPoint bind_point)
|
|
{
|
|
struct hk_descriptor_state *desc = hk_get_descriptors_state(cmd, bind_point);
|
|
struct hk_root_descriptor_table *root = &desc->root;
|
|
|
|
struct agx_ptr root_ptr = hk_pool_alloc(cmd, sizeof(*root), 8);
|
|
if (!root_ptr.gpu)
|
|
return 0;
|
|
|
|
root->root_desc_addr = root_ptr.gpu;
|
|
|
|
memcpy(root_ptr.cpu, root, sizeof(*root));
|
|
return root_ptr.gpu;
|
|
}
|
|
|
|
void
|
|
hk_usc_upload_spilled_rt_descs(struct agx_usc_builder *b,
|
|
struct hk_cmd_buffer *cmd)
|
|
{
|
|
struct hk_rendering_state *render = &cmd->state.gfx.render;
|
|
|
|
/* Upload texture/PBE descriptors for each render target so we can clear
|
|
* spilled render targets.
|
|
*/
|
|
struct agx_ptr descs =
|
|
hk_pool_alloc(cmd, AGX_TEXTURE_LENGTH * 2 * render->color_att_count, 64);
|
|
struct agx_texture_packed *desc = descs.cpu;
|
|
if (!desc)
|
|
return;
|
|
|
|
for (unsigned i = 0; i < render->color_att_count; ++i) {
|
|
struct hk_image_view *iview = render->color_att[i].iview;
|
|
if (!iview) {
|
|
/* XXX: probably should emit a null descriptor here...? */
|
|
continue;
|
|
}
|
|
|
|
memcpy(&desc[(i * 2) + 0], &iview->planes[0].emrt_texture, sizeof(*desc));
|
|
memcpy(&desc[(i * 2) + 1], &iview->planes[0].emrt_pbe, sizeof(*desc));
|
|
}
|
|
|
|
desc = descs.cpu;
|
|
|
|
/* Bind the base as u0_u1 for bindless access */
|
|
agx_usc_uniform(b, 0, 4, hk_pool_upload(cmd, &descs.gpu, 8, 8));
|
|
}
|
|
|
|
void
|
|
hk_reserve_scratch(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|
struct hk_shader *s)
|
|
{
|
|
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
|
uint32_t max_scratch_size =
|
|
MAX2(s->b.info.scratch_size, s->b.info.preamble_scratch_size);
|
|
|
|
if (max_scratch_size == 0)
|
|
return;
|
|
|
|
unsigned preamble_size = (s->b.info.preamble_scratch_size > 0) ? 1 : 0;
|
|
|
|
/* Note: this uses the hardware stage, not the software stage */
|
|
hk_device_alloc_scratch(dev, s->b.info.stage, max_scratch_size);
|
|
|
|
switch (s->b.info.stage) {
|
|
case PIPE_SHADER_FRAGMENT:
|
|
cs->scratch.fs.main = true;
|
|
cs->scratch.fs.preamble = MAX2(cs->scratch.fs.preamble, preamble_size);
|
|
break;
|
|
case PIPE_SHADER_VERTEX:
|
|
cs->scratch.vs.main = true;
|
|
cs->scratch.vs.preamble = MAX2(cs->scratch.vs.preamble, preamble_size);
|
|
break;
|
|
default:
|
|
cs->scratch.cs.main = true;
|
|
cs->scratch.cs.preamble = MAX2(cs->scratch.cs.preamble, preamble_size);
|
|
break;
|
|
}
|
|
}
|
|
|
|
uint32_t
|
|
hk_upload_usc_words(struct hk_cmd_buffer *cmd, struct hk_shader *s,
|
|
struct hk_linked_shader *linked)
|
|
{
|
|
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
|
|
|
enum pipe_shader_type sw_stage = s->info.stage;
|
|
enum pipe_shader_type hw_stage = s->b.info.stage;
|
|
|
|
unsigned constant_push_ranges =
|
|
DIV_ROUND_UP(s->b.info.immediate_size_16, 64);
|
|
unsigned push_ranges = 2;
|
|
unsigned stage_ranges = 3;
|
|
|
|
size_t usc_size =
|
|
agx_usc_size(constant_push_ranges + push_ranges + stage_ranges + 4);
|
|
struct agx_ptr t = hk_pool_usc_alloc(cmd, usc_size, 64);
|
|
if (!t.cpu)
|
|
return 0;
|
|
|
|
struct agx_usc_builder b = agx_usc_builder(t.cpu, usc_size);
|
|
|
|
uint64_t root_ptr;
|
|
|
|
if (sw_stage == PIPE_SHADER_COMPUTE)
|
|
root_ptr = hk_cmd_buffer_upload_root(cmd, VK_PIPELINE_BIND_POINT_COMPUTE);
|
|
else
|
|
root_ptr = cmd->state.gfx.root;
|
|
|
|
static_assert(offsetof(struct hk_root_descriptor_table, root_desc_addr) == 0,
|
|
"self-reflective");
|
|
|
|
agx_usc_uniform(&b, HK_ROOT_UNIFORM, 4, root_ptr);
|
|
|
|
if (sw_stage == MESA_SHADER_VERTEX) {
|
|
unsigned count =
|
|
DIV_ROUND_UP(BITSET_LAST_BIT(s->info.vs.attrib_components_read), 4);
|
|
|
|
if (count) {
|
|
agx_usc_uniform(
|
|
&b, 0, 4 * count,
|
|
root_ptr + hk_root_descriptor_offset(draw.attrib_base));
|
|
|
|
agx_usc_uniform(
|
|
&b, 4 * count, 2 * count,
|
|
root_ptr + hk_root_descriptor_offset(draw.attrib_clamps));
|
|
}
|
|
|
|
if (cmd->state.gfx.draw_params)
|
|
agx_usc_uniform(&b, 6 * count, 4, cmd->state.gfx.draw_params);
|
|
|
|
if (cmd->state.gfx.draw_id_ptr)
|
|
agx_usc_uniform(&b, (6 * count) + 4, 1, cmd->state.gfx.draw_id_ptr);
|
|
|
|
if (hw_stage == MESA_SHADER_COMPUTE) {
|
|
agx_usc_uniform(
|
|
&b, (6 * count) + 8, 4,
|
|
root_ptr + hk_root_descriptor_offset(draw.input_assembly));
|
|
}
|
|
} else if (sw_stage == MESA_SHADER_FRAGMENT) {
|
|
if (agx_tilebuffer_spills(&cmd->state.gfx.render.tilebuffer)) {
|
|
hk_usc_upload_spilled_rt_descs(&b, cmd);
|
|
}
|
|
|
|
agx_usc_uniform(
|
|
&b, 4, 8, root_ptr + hk_root_descriptor_offset(draw.blend_constant));
|
|
|
|
/* The SHARED state is baked into linked->usc for non-fragment shaders. We
|
|
* don't pass around the information to bake the tilebuffer layout.
|
|
*
|
|
* TODO: We probably could with some refactor.
|
|
*/
|
|
agx_usc_push_packed(&b, SHARED, &cmd->state.gfx.render.tilebuffer.usc);
|
|
}
|
|
|
|
agx_usc_push_blob(&b, linked->usc.data, linked->usc.size);
|
|
return agx_usc_addr(&dev->dev, t.gpu);
|
|
}
|
|
|
|
/* Specialized variant of hk_upload_usc_words for internal dispatches that do
|
|
* not use any state except for some directly mapped uniforms.
|
|
*/
|
|
uint32_t
|
|
hk_upload_usc_words_kernel(struct hk_cmd_buffer *cmd, struct hk_shader *s,
|
|
void *data, size_t data_size)
|
|
{
|
|
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
|
|
|
assert(s->info.stage == MESA_SHADER_COMPUTE);
|
|
assert(s->b.info.scratch_size == 0 && "you shouldn't be spilling!");
|
|
assert(s->b.info.preamble_scratch_size == 0 && "you shouldn't be spilling!");
|
|
|
|
unsigned constant_push_ranges =
|
|
DIV_ROUND_UP(s->b.info.immediate_size_16, 64);
|
|
|
|
size_t usc_size = agx_usc_size(constant_push_ranges + 7);
|
|
struct agx_ptr t = hk_pool_usc_alloc(cmd, usc_size, 64);
|
|
if (!t.cpu)
|
|
return 0;
|
|
|
|
struct agx_usc_builder b = agx_usc_builder(t.cpu, usc_size);
|
|
|
|
/* Map the data directly as uniforms starting at u0 */
|
|
agx_usc_uniform(&b, 0, DIV_ROUND_UP(data_size, 2),
|
|
hk_pool_upload(cmd, data, data_size, 4));
|
|
|
|
agx_usc_push_blob(&b, s->only_linked->usc.data, s->only_linked->usc.size);
|
|
return agx_usc_addr(&dev->dev, t.gpu);
|
|
}
|
|
|
|
void
|
|
hk_cs_init_graphics(struct hk_cmd_buffer *cmd, struct hk_cs *cs)
|
|
{
|
|
struct hk_rendering_state *render = &cmd->state.gfx.render;
|
|
uint8_t *map = cs->current;
|
|
|
|
cs->tib = render->tilebuffer;
|
|
|
|
/* Assume this is not the first control stream of the render pass, so
|
|
* initially use the partial background program and ZLS control.
|
|
* hk_BeginRendering will override.
|
|
*/
|
|
cs->cr = render->cr;
|
|
cs->cr.bg.main = render->cr.bg.partial;
|
|
cs->cr.zls_control = render->cr.zls_control_partial;
|
|
|
|
/* Barrier to enforce GPU-CPU coherency, in case this batch is back to back
|
|
* with another that caused stale data to be cached and the CPU wrote to it
|
|
* in the meantime.
|
|
*/
|
|
agx_push(map, VDM_BARRIER, cfg) {
|
|
cfg.usc_cache_inval = true;
|
|
}
|
|
|
|
struct AGX_PPP_HEADER present = {
|
|
.w_clamp = true,
|
|
.occlusion_query_2 = true,
|
|
.output_unknown = true,
|
|
.varying_word_2 = true,
|
|
.viewport_count = 1, /* irrelevant */
|
|
};
|
|
|
|
size_t size = agx_ppp_update_size(&present);
|
|
struct agx_ptr T = hk_pool_alloc(cmd, size, 64);
|
|
if (!T.cpu)
|
|
return;
|
|
|
|
struct agx_ppp_update ppp = agx_new_ppp_update(T, size, &present);
|
|
|
|
/* clang-format off */
|
|
agx_ppp_push(&ppp, W_CLAMP, cfg) cfg.w_clamp = 1e-10;
|
|
agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY_2, cfg);
|
|
agx_ppp_push(&ppp, OUTPUT_UNKNOWN, cfg);
|
|
agx_ppp_push(&ppp, VARYING_2, cfg);
|
|
/* clang-format on */
|
|
|
|
agx_ppp_fini(&map, &ppp);
|
|
cs->current = map;
|
|
|
|
util_dynarray_init(&cs->scissor, NULL);
|
|
util_dynarray_init(&cs->depth_bias, NULL);
|
|
|
|
/* All graphics state must be reemited in each control stream */
|
|
hk_cmd_buffer_dirty_all(cmd);
|
|
}
|
|
|
|
void
|
|
hk_ensure_cs_has_space(struct hk_cmd_buffer *cmd, struct hk_cs *cs,
|
|
size_t space)
|
|
{
|
|
bool vdm = cs->type == HK_CS_VDM;
|
|
|
|
size_t link_length =
|
|
vdm ? AGX_VDM_STREAM_LINK_LENGTH : AGX_CDM_STREAM_LINK_LENGTH;
|
|
|
|
/* Assert that we have space for a link tag */
|
|
assert((cs->current + link_length) <= cs->end && "Encoder overflowed");
|
|
|
|
/* Always leave room for a link tag, in case we run out of space later,
|
|
* plus padding because VDM apparently overreads?
|
|
*
|
|
* 0x200 is not enough. 0x400 seems to work. 0x800 for safety.
|
|
*/
|
|
space += link_length + 0x800;
|
|
|
|
/* If there is room in the command buffer, we're done */
|
|
if (likely((cs->end - cs->current) >= space))
|
|
return;
|
|
|
|
/* Otherwise, we need to allocate a new command buffer. We use memory owned
|
|
* by the batch to simplify lifetime management for the BO.
|
|
*/
|
|
size_t size = 65536;
|
|
struct agx_ptr T = hk_pool_alloc(cmd, size, 256);
|
|
|
|
/* Jump from the old control stream to the new control stream */
|
|
if (vdm) {
|
|
agx_pack(cs->current, VDM_STREAM_LINK, cfg) {
|
|
cfg.target_lo = T.gpu & BITFIELD_MASK(32);
|
|
cfg.target_hi = T.gpu >> 32;
|
|
}
|
|
} else {
|
|
agx_pack(cs->current, CDM_STREAM_LINK, cfg) {
|
|
cfg.target_lo = T.gpu & BITFIELD_MASK(32);
|
|
cfg.target_hi = T.gpu >> 32;
|
|
}
|
|
}
|
|
|
|
/* Swap out the control stream */
|
|
cs->current = T.cpu;
|
|
cs->end = cs->current + size;
|
|
cs->stream_linked = true;
|
|
}
|