tu: Implement VK_EXT_descriptor_buffer

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19849>
This commit is contained in:
Connor Abbott
2022-09-19 16:59:53 +02:00
committed by Marge Bot
parent b28899a261
commit cb3872f2cd
9 changed files with 465 additions and 77 deletions

View File

@@ -548,7 +548,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_EXT_depth_clip_control DONE (anv, lvp, radv, tu, v3dv, vn)
VK_EXT_depth_clip_enable DONE (anv, lvp, radv, tu, vn)
VK_EXT_depth_range_unrestricted DONE (radv, lvp)
VK_EXT_descriptor_buffer DONE (radv)
VK_EXT_descriptor_buffer DONE (radv, tu)
VK_EXT_discard_rectangles DONE (radv)
VK_EXT_display_control DONE (anv, radv, tu)
VK_EXT_extended_dynamic_state3 DONE (lvp, radv, tu)

View File

@@ -165,6 +165,12 @@ tu6_emit_flushes(struct tu_cmd_buffer *cmd_buffer,
tu6_emit_event_write(cmd_buffer, cs, CACHE_FLUSH_TS);
if (flushes & TU_CMD_FLAG_CACHE_INVALIDATE)
tu6_emit_event_write(cmd_buffer, cs, CACHE_INVALIDATE);
if (flushes & TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE) {
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
.gfx_bindless = 0x1f,
.cs_bindless = 0x1f,
));
}
if (flushes & TU_CMD_FLAG_WAIT_MEM_WRITES)
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
if ((flushes & TU_CMD_FLAG_WAIT_FOR_IDLE) ||
@@ -2061,6 +2067,64 @@ tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
cmd->state.index_size = index_size;
}
static void
tu6_emit_descriptor_sets(struct tu_cmd_buffer *cmd,
VkPipelineBindPoint bind_point)
{
struct tu_descriptor_state *descriptors_state =
tu_get_descriptors_state(cmd, bind_point);
uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_invalidate_value;
struct tu_cs *cs, state_cs;
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0);
hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f);
cmd->state.desc_sets =
tu_cs_draw_state(&cmd->sub_cs, &state_cs,
4 + 4 * descriptors_state->max_sets_bound +
(descriptors_state->dynamic_bound ? 6 : 0));
cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD;
cs = &state_cs;
} else {
assert(bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0);
hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0);
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f);
cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD;
cs = &cmd->cs;
}
tu_cs_emit_pkt4(cs, sp_bindless_base_reg, 2 * descriptors_state->max_sets_bound);
tu_cs_emit_array(cs, (const uint32_t*)descriptors_state->set_iova, 2 * descriptors_state->max_sets_bound);
tu_cs_emit_pkt4(cs, hlsq_bindless_base_reg, 2 * descriptors_state->max_sets_bound);
tu_cs_emit_array(cs, (const uint32_t*)descriptors_state->set_iova, 2 * descriptors_state->max_sets_bound);
/* Dynamic descriptors get the last descriptor set. */
if (descriptors_state->dynamic_bound) {
tu_cs_emit_pkt4(cs, sp_bindless_base_reg + 4 * 2, 2);
tu_cs_emit_qw(cs, descriptors_state->set_iova[MAX_SETS]);
tu_cs_emit_pkt4(cs, hlsq_bindless_base_reg + 4 * 2, 2);
tu_cs_emit_qw(cs, descriptors_state->set_iova[MAX_SETS]);
}
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(.dword = hlsq_invalidate_value));
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
assert(cs->cur == cs->end); /* validate draw state size */
/* note: this also avoids emitting draw states before renderpass clears,
* which may use the 3D clear path (for MSAA cases)
*/
if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) {
tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3);
tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets);
}
}
}
VKAPI_ATTR void VKAPI_CALL
tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
@@ -2086,6 +2150,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]);
descriptors_state->sets[idx] = set;
descriptors_state->set_iova[idx] = set->va | 3;
if (!set)
continue;
@@ -2138,17 +2203,6 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
}
assert(dyn_idx == dynamicOffsetCount);
uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_invalidate_value;
uint64_t addr[MAX_SETS] = {};
uint64_t dynamic_addr = 0;
struct tu_cs *cs, state_cs;
for (uint32_t i = 0; i < descriptors_state->max_sets_bound; i++) {
struct tu_descriptor_set *set = descriptors_state->sets[i];
if (set)
addr[i] = set->va | 3;
}
if (layout->dynamic_offset_size) {
/* allocate and fill out dynamic descriptor set */
struct tu_cs_memory dynamic_desc_set;
@@ -2162,57 +2216,79 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
memcpy(dynamic_desc_set.map, descriptors_state->dynamic_descriptors,
layout->dynamic_offset_size);
dynamic_addr = dynamic_desc_set.iova | 3;
descriptors_state->set_iova[MAX_SETS] = dynamic_desc_set.iova | 3;
descriptors_state->dynamic_bound = true;
}
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0);
hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f);
tu6_emit_descriptor_sets(cmd, pipelineBindPoint);
}
cmd->state.desc_sets =
tu_cs_draw_state(&cmd->sub_cs, &state_cs,
4 + 4 * descriptors_state->max_sets_bound +
(descriptors_state->dynamic_bound ? 6 : 0));
cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD;
cs = &state_cs;
} else {
assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE);
VKAPI_ATTR void VKAPI_CALL
tu_CmdBindDescriptorBuffersEXT(
VkCommandBuffer commandBuffer,
uint32_t bufferCount,
const VkDescriptorBufferBindingInfoEXT *pBindingInfos)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0);
hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0);
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f);
for (unsigned i = 0; i < bufferCount; i++)
cmd->state.descriptor_buffer_iova[i] = pBindingInfos[i].address;
}
cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD;
cs = &cmd->cs;
VKAPI_ATTR void VKAPI_CALL
tu_CmdSetDescriptorBufferOffsetsEXT(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout _layout,
uint32_t firstSet,
uint32_t setCount,
const uint32_t *pBufferIndices,
const VkDeviceSize *pOffsets)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
TU_FROM_HANDLE(tu_pipeline_layout, layout, _layout);
struct tu_descriptor_state *descriptors_state =
tu_get_descriptors_state(cmd, pipelineBindPoint);
descriptors_state->max_sets_bound =
MAX2(descriptors_state->max_sets_bound, firstSet + setCount);
for (unsigned i = 0; i < setCount; ++i) {
unsigned idx = i + firstSet;
struct tu_descriptor_set_layout *set_layout = layout->set[idx].layout;
descriptors_state->set_iova[idx] =
(cmd->state.descriptor_buffer_iova[pBufferIndices[i]] + pOffsets[i]) | 3;
if (set_layout->has_inline_uniforms)
cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
}
tu_cs_emit_pkt4(cs, sp_bindless_base_reg, 2 * descriptors_state->max_sets_bound);
tu_cs_emit_array(cs, (const uint32_t*) addr, 2 * descriptors_state->max_sets_bound);
tu_cs_emit_pkt4(cs, hlsq_bindless_base_reg, 2 * descriptors_state->max_sets_bound);
tu_cs_emit_array(cs, (const uint32_t*) addr, 2 * descriptors_state->max_sets_bound);
tu6_emit_descriptor_sets(cmd, pipelineBindPoint);
}
/* Dynamic descriptors get the last descriptor set. */
if (descriptors_state->dynamic_bound) {
tu_cs_emit_pkt4(cs, sp_bindless_base_reg + 4 * 2, 2);
tu_cs_emit_qw(cs, dynamic_addr);
tu_cs_emit_pkt4(cs, hlsq_bindless_base_reg + 4 * 2, 2);
tu_cs_emit_qw(cs, dynamic_addr);
}
VKAPI_ATTR void VKAPI_CALL
tu_CmdBindDescriptorBufferEmbeddedSamplersEXT(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout _layout,
uint32_t set)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
TU_FROM_HANDLE(tu_pipeline_layout, layout, _layout);
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(.dword = hlsq_invalidate_value));
struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
assert(cs->cur == cs->end); /* validate draw state size */
/* note: this also avoids emitting draw states before renderpass clears,
* which may use the 3D clear path (for MSAA cases)
*/
if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) {
tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3);
tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets);
}
}
struct tu_descriptor_state *descriptors_state =
tu_get_descriptors_state(cmd, pipelineBindPoint);
descriptors_state->max_sets_bound =
MAX2(descriptors_state->max_sets_bound, set + 1);
descriptors_state->set_iova[set] = set_layout->embedded_samplers->iova | 3;
tu6_emit_descriptor_sets(cmd, pipelineBindPoint);
}
static enum VkResult
@@ -3489,6 +3565,10 @@ tu_flush_for_access(struct tu_cache_state *cache,
DST_INCOHERENT_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR)
DST_INCOHERENT_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH)
if (dst_mask & TU_ACCESS_BINDLESS_DESCRIPTOR_READ) {
flush_bits |= TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE;
}
#undef DST_INCOHERENT_FLUSH
cache->flush_bits |= flush_bits;
@@ -3592,6 +3672,12 @@ vk2tu_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages, bool image_only
SHADER_STAGES))
mask |= TU_ACCESS_UCHE_READ;
if (gfx_read_access(flags, stages,
VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT,
SHADER_STAGES)) {
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_BINDLESS_DESCRIPTOR_READ;
}
if (gfx_write_access(flags, stages,
VK_ACCESS_2_SHADER_WRITE_BIT |
VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT,
@@ -4492,6 +4578,8 @@ tu6_emit_user_consts(struct tu_cs *cs,
for (unsigned i = 0; i < link->tu_const_state.num_inline_ubos; i++) {
const struct tu_inline_ubo *ubo = &link->tu_const_state.ubos[i];
uint64_t va = descriptors->set_iova[ubo->base] & ~0x3f;
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), ubo->push_address ? 7 : 3);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(ubo->const_offset_vec4) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
@@ -4501,11 +4589,11 @@ tu6_emit_user_consts(struct tu_cs *cs,
if (ubo->push_address) {
tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0);
tu_cs_emit_qw(cs, descriptors->sets[ubo->base]->va + ubo->offset);
tu_cs_emit_qw(cs, va + ubo->offset);
tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0);
} else {
tu_cs_emit_qw(cs, descriptors->sets[ubo->base]->va + ubo->offset);
tu_cs_emit_qw(cs, va + ubo->offset);
}
}
}

View File

@@ -46,6 +46,7 @@ struct tu_descriptor_state
struct tu_descriptor_set *sets[MAX_SETS];
struct tu_descriptor_set push_set;
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE];
uint64_t set_iova[MAX_SETS + 1];
uint32_t max_sets_bound;
bool dynamic_bound;
};
@@ -120,13 +121,20 @@ enum tu_cmd_access_mask {
*/
TU_ACCESS_CP_WRITE = 1 << 12,
/* Descriptors are read through UCHE but are also prefetched via
* CP_LOAD_STATE6 and the prefetched descriptors need to be invalidated
* when they change.
*/
TU_ACCESS_BINDLESS_DESCRIPTOR_READ = 1 << 13,
TU_ACCESS_READ =
TU_ACCESS_UCHE_READ |
TU_ACCESS_CCU_COLOR_READ |
TU_ACCESS_CCU_DEPTH_READ |
TU_ACCESS_CCU_COLOR_INCOHERENT_READ |
TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |
TU_ACCESS_SYSMEM_READ,
TU_ACCESS_SYSMEM_READ |
TU_ACCESS_BINDLESS_DESCRIPTOR_READ,
TU_ACCESS_WRITE =
TU_ACCESS_UCHE_WRITE |
@@ -203,6 +211,7 @@ enum tu_cmd_flush_bits {
TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6,
TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7,
TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8,
TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE = 1 << 9,
TU_CMD_FLAG_ALL_FLUSH =
TU_CMD_FLAG_CCU_FLUSH_DEPTH |
@@ -217,6 +226,7 @@ enum tu_cmd_flush_bits {
TU_CMD_FLAG_CCU_INVALIDATE_DEPTH |
TU_CMD_FLAG_CCU_INVALIDATE_COLOR |
TU_CMD_FLAG_CACHE_INVALIDATE |
TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE |
/* Treat CP_WAIT_FOR_ME as a "cache" that needs to be invalidated when a
* a command that needs CP_WAIT_FOR_ME is executed. This means we may
* insert an extra WAIT_FOR_ME before an indirect command requiring it
@@ -527,6 +537,8 @@ struct tu_cmd_state
struct tu_vs_params last_vs_params;
struct tu_primitive_params last_prim_params;
uint64_t descriptor_buffer_iova[MAX_SETS];
};
struct tu_cmd_buffer

View File

@@ -27,6 +27,7 @@
#include "tu_device.h"
#include "tu_image.h"
#include "tu_formats.h"
static inline uint8_t *
pool_base(struct tu_descriptor_pool *pool)
@@ -93,6 +94,19 @@ mutable_descriptor_size(struct tu_device *dev,
return max_size;
}
static void
tu_descriptor_set_layout_destroy(struct vk_device *vk_dev,
struct vk_descriptor_set_layout *vk_layout)
{
struct tu_device *dev = container_of(vk_dev, struct tu_device, vk);
struct tu_descriptor_set_layout *layout =
container_of(vk_layout, struct tu_descriptor_set_layout, vk);
if (layout->embedded_samplers)
tu_bo_finish(dev, layout->embedded_samplers);
vk_descriptor_set_layout_destroy(vk_dev, vk_layout);
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDescriptorSetLayout(
VkDevice _device,
@@ -149,6 +163,7 @@ tu_CreateDescriptorSetLayout(
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
set_layout->flags = pCreateInfo->flags;
set_layout->vk.destroy = tu_descriptor_set_layout_destroy;
/* We just allocate all the immutable samplers at the end of the struct */
struct tu_sampler *samplers = (void*) &set_layout->binding[num_bindings];
@@ -256,6 +271,38 @@ tu_CreateDescriptorSetLayout(
set_layout->dynamic_offset_size = dynamic_offset_size;
if (pCreateInfo->flags &
VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT) {
result = tu_bo_init_new(device, &set_layout->embedded_samplers,
set_layout->size, TU_BO_ALLOC_ALLOW_DUMP,
"embedded samplers");
if (result != VK_SUCCESS) {
vk_object_free(&device->vk, pAllocator, set_layout);
return vk_error(device, result);
}
result = tu_bo_map(device, set_layout->embedded_samplers);
if (result != VK_SUCCESS) {
tu_bo_finish(device, set_layout->embedded_samplers);
vk_object_free(&device->vk, pAllocator, set_layout);
return vk_error(device, result);
}
char *map = set_layout->embedded_samplers->map;
for (unsigned i = 0; i < set_layout->binding_count; i++) {
if (!set_layout->binding[i].immutable_samplers_offset)
continue;
unsigned offset = set_layout->binding[i].offset;
const struct tu_sampler *sampler =
(const struct tu_sampler *)((const char *)set_layout +
set_layout->binding[i].immutable_samplers_offset);
assert(set_layout->binding[i].array_size == 1);
memcpy(map + offset, sampler->descriptor,
sizeof(sampler->descriptor));
}
}
*pSetLayout = tu_descriptor_set_layout_to_handle(set_layout);
return VK_SUCCESS;
@@ -360,6 +407,30 @@ out:
pSupport->supported = supported;
}
VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorSetLayoutSizeEXT(
VkDevice _device,
VkDescriptorSetLayout _layout,
VkDeviceSize *pLayoutSizeInBytes)
{
TU_FROM_HANDLE(tu_descriptor_set_layout, layout, _layout);
*pLayoutSizeInBytes = layout->size;
}
VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorSetLayoutBindingOffsetEXT(
VkDevice _device,
VkDescriptorSetLayout _layout,
uint32_t binding,
VkDeviceSize *pOffset)
{
TU_FROM_HANDLE(tu_descriptor_set_layout, layout, _layout);
assert(binding < layout->binding_count);
*pOffset = layout->binding[binding].offset;
}
/* Note: we must hash any values used in tu_lower_io(). */
#define SHA1_UPDATE_VALUE(ctx, x) _mesa_sha1_update(ctx, &(x), sizeof(x));
@@ -905,6 +976,21 @@ tu_FreeDescriptorSets(VkDevice _device,
return VK_SUCCESS;
}
static void
write_texel_buffer_descriptor_addr(uint32_t *dst,
const VkDescriptorAddressInfoEXT *buffer_info)
{
if (!buffer_info || buffer_info->address == 0) {
memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
} else {
uint8_t swiz[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
PIPE_SWIZZLE_W };
fdl6_buffer_view_init(dst,
tu_vk_format_to_pipe_format(buffer_info->format),
swiz, buffer_info->address, buffer_info->range);
}
}
static void
write_texel_buffer_descriptor(uint32_t *dst, const VkBufferView buffer_view)
{
@@ -917,10 +1003,24 @@ write_texel_buffer_descriptor(uint32_t *dst, const VkBufferView buffer_view)
}
}
static VkDescriptorAddressInfoEXT
buffer_info_to_address(const VkDescriptorBufferInfo *buffer_info)
{
TU_FROM_HANDLE(tu_buffer, buffer, buffer_info->buffer);
uint32_t range = buffer ? vk_buffer_range(&buffer->vk, buffer_info->offset, buffer_info->range) : 0;
uint64_t va = buffer ? buffer->iova + buffer_info->offset : 0;
return (VkDescriptorAddressInfoEXT) {
.address = va,
.range = range,
};
}
static void
write_buffer_descriptor(const struct tu_device *device,
uint32_t *dst,
const VkDescriptorBufferInfo *buffer_info)
write_buffer_descriptor_addr(const struct tu_device *device,
uint32_t *dst,
const VkDescriptorAddressInfoEXT *buffer_info)
{
bool storage_16bit = device->physical_device->info->a6xx.storage_16bit;
/* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit
@@ -928,16 +1028,14 @@ write_buffer_descriptor(const struct tu_device *device,
* isam.
*/
unsigned descriptors = storage_16bit ? 2 : 1;
if (buffer_info->buffer == VK_NULL_HANDLE) {
if (!buffer_info || buffer_info->address == 0) {
memset(dst, 0, descriptors * A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
return;
}
TU_FROM_HANDLE(tu_buffer, buffer, buffer_info->buffer);
assert((buffer_info->offset & 63) == 0); /* minStorageBufferOffsetAlignment */
uint64_t va = buffer->iova + buffer_info->offset;
uint32_t range = vk_buffer_range(&buffer->vk, buffer_info->offset, buffer_info->range);
uint64_t va = buffer_info->address;
uint32_t range = buffer_info->range;
for (unsigned i = 0; i < descriptors; i++) {
if (storage_16bit && i == 0) {
@@ -959,30 +1057,43 @@ write_buffer_descriptor(const struct tu_device *device,
}
static void
write_ubo_descriptor(uint32_t *dst, const VkDescriptorBufferInfo *buffer_info)
write_buffer_descriptor(const struct tu_device *device,
uint32_t *dst,
const VkDescriptorBufferInfo *buffer_info)
{
if (buffer_info->buffer == VK_NULL_HANDLE) {
VkDescriptorAddressInfoEXT addr = buffer_info_to_address(buffer_info);
write_buffer_descriptor_addr(device, dst, &addr);
}
static void
write_ubo_descriptor_addr(uint32_t *dst,
const VkDescriptorAddressInfoEXT *buffer_info)
{
if (!buffer_info) {
dst[0] = dst[1] = 0;
return;
}
TU_FROM_HANDLE(tu_buffer, buffer, buffer_info->buffer);
uint32_t range = vk_buffer_range(&buffer->vk, buffer_info->offset, buffer_info->range);
uint64_t va = buffer_info->address;
/* The HW range is in vec4 units */
range = ALIGN_POT(range, 16) / 16;
uint64_t va = buffer->iova + buffer_info->offset;
uint32_t range = va ? DIV_ROUND_UP(buffer_info->range, 16) : 0;
dst[0] = A6XX_UBO_0_BASE_LO(va);
dst[1] = A6XX_UBO_1_BASE_HI(va >> 32) | A6XX_UBO_1_SIZE(range);
}
static void
write_ubo_descriptor(uint32_t *dst, const VkDescriptorBufferInfo *buffer_info)
{
VkDescriptorAddressInfoEXT addr = buffer_info_to_address(buffer_info);
write_ubo_descriptor_addr(dst, &addr);
}
static void
write_image_descriptor(uint32_t *dst,
VkDescriptorType descriptor_type,
const VkDescriptorImageInfo *image_info)
{
if (image_info->imageView == VK_NULL_HANDLE) {
if (!image_info || image_info->imageView == VK_NULL_HANDLE) {
memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
return;
}
@@ -1006,14 +1117,15 @@ write_combined_image_sampler_descriptor(uint32_t *dst,
/* copy over sampler state */
if (has_sampler) {
TU_FROM_HANDLE(tu_sampler, sampler, image_info->sampler);
memcpy(dst + A6XX_TEX_CONST_DWORDS, sampler->descriptor, sizeof(sampler->descriptor));
}
}
static void
write_sampler_descriptor(uint32_t *dst, const VkDescriptorImageInfo *image_info)
write_sampler_descriptor(uint32_t *dst, VkSampler _sampler)
{
TU_FROM_HANDLE(tu_sampler, sampler, image_info->sampler);
TU_FROM_HANDLE(tu_sampler, sampler, _sampler);
memcpy(dst, sampler->descriptor, sizeof(sampler->descriptor));
}
@@ -1025,6 +1137,103 @@ write_sampler_push(uint32_t *dst, const struct tu_sampler *sampler)
memcpy(dst, sampler->descriptor, sizeof(sampler->descriptor));
}
VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorEXT(
VkDevice _device,
const VkDescriptorGetInfoEXT *pDescriptorInfo,
size_t dataSize,
void *pDescriptor)
{
TU_FROM_HANDLE(tu_device, device, _device);
switch (pDescriptorInfo->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
write_ubo_descriptor_addr(pDescriptor, pDescriptorInfo->data.pUniformBuffer);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
write_buffer_descriptor_addr(device, pDescriptor, pDescriptorInfo->data.pStorageBuffer);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
write_texel_buffer_descriptor_addr(pDescriptor, pDescriptorInfo->data.pUniformTexelBuffer);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
write_texel_buffer_descriptor_addr(pDescriptor, pDescriptorInfo->data.pStorageTexelBuffer);
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
write_image_descriptor(pDescriptor, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
pDescriptorInfo->data.pSampledImage);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
write_image_descriptor(pDescriptor, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
pDescriptorInfo->data.pStorageImage);
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
write_combined_image_sampler_descriptor(pDescriptor,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
pDescriptorInfo->data.pCombinedImageSampler,
true);
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
write_sampler_descriptor(pDescriptor, *pDescriptorInfo->data.pSampler);
break;
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
/* nothing in descriptor set - framebuffer state is used instead */
if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
write_image_descriptor(pDescriptor, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
pDescriptorInfo->data.pInputAttachmentImage);
}
break;
default:
unreachable("unimplemented descriptor type");
break;
}
}
/* We don't have any mutable state in buffers, images, image views, or
* samplers, so we shouldn't need to save/restore anything to get the same
* descriptor back as long as the user uses the same iova.
*/
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetBufferOpaqueCaptureDescriptorDataEXT(VkDevice device,
const VkBufferCaptureDescriptorDataInfoEXT *pInfo,
void *pData)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetImageOpaqueCaptureDescriptorDataEXT(VkDevice device,
const VkImageCaptureDescriptorDataInfoEXT *pInfo,
void *pData)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetImageViewOpaqueCaptureDescriptorDataEXT(VkDevice device,
const VkImageViewCaptureDescriptorDataInfoEXT *pInfo,
void *pData)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetSamplerOpaqueCaptureDescriptorDataEXT(VkDevice _device,
const VkSamplerCaptureDescriptorDataInfoEXT *pInfo,
void *pData)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetAccelerationStructureOpaqueCaptureDescriptorDataEXT(VkDevice device,
const VkAccelerationStructureCaptureDescriptorDataInfoEXT *pInfo,
void *pData)
{
return VK_SUCCESS;
}
void
tu_update_descriptor_sets(const struct tu_device *device,
VkDescriptorSet dstSetOverride,
@@ -1124,7 +1333,7 @@ tu_update_descriptor_sets(const struct tu_device *device,
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
if (!binding_layout->immutable_samplers_offset)
write_sampler_descriptor(ptr, writeset->pImageInfo + j);
write_sampler_descriptor(ptr, writeset->pImageInfo[j].sampler);
else if (copy_immutable_samplers)
write_sampler_push(ptr, &samplers[writeset->dstArrayElement + j]);
break;
@@ -1453,7 +1662,7 @@ tu_update_descriptor_set_with_template(
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
if (templ->entry[i].has_sampler)
write_sampler_descriptor(ptr, src);
write_sampler_descriptor(ptr, ((const VkDescriptorImageInfo *)src)->sampler);
else if (samplers)
write_sampler_push(ptr, &samplers[j]);
break;

View File

@@ -75,6 +75,8 @@ struct tu_descriptor_set_layout
bool has_variable_descriptors;
bool has_inline_uniforms;
struct tu_bo *embedded_samplers;
/* Bindings in this descriptor set */
struct tu_descriptor_set_binding_layout binding[0];
};

View File

@@ -254,6 +254,7 @@ get_device_extensions(const struct tu_physical_device *device,
.KHR_pipeline_library = true,
.EXT_graphics_pipeline_library = true,
.EXT_post_depth_coverage = true,
.EXT_descriptor_buffer = true,
};
}
@@ -980,6 +981,15 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT: {
VkPhysicalDeviceDescriptorBufferFeaturesEXT *features =
(VkPhysicalDeviceDescriptorBufferFeaturesEXT *)ext;
features->descriptorBuffer = true;
features->descriptorBufferCaptureReplay = pdevice->has_set_iova;
features->descriptorBufferImageLayoutIgnored = true;
features->descriptorBufferPushDescriptors = true;
break;
}
default:
break;
@@ -1451,6 +1461,52 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
properties->dynamicPrimitiveTopologyUnrestricted = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT: {
VkPhysicalDeviceDescriptorBufferPropertiesEXT *properties =
(VkPhysicalDeviceDescriptorBufferPropertiesEXT *)ext;
properties->combinedImageSamplerDescriptorSingleArray = true;
properties->bufferlessPushDescriptors = true;
properties->allowSamplerImageViewPostSubmitCreation = true;
properties->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
properties->maxDescriptorBufferBindings = MAX_SETS;
properties->maxResourceDescriptorBufferBindings = MAX_SETS;
properties->maxSamplerDescriptorBufferBindings = MAX_SETS;
properties->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
properties->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
properties->bufferCaptureReplayDescriptorDataSize = 0;
properties->imageCaptureReplayDescriptorDataSize = 0;
properties->imageViewCaptureReplayDescriptorDataSize = 0;
properties->samplerCaptureReplayDescriptorDataSize = 0;
properties->accelerationStructureCaptureReplayDescriptorDataSize = 0;
/* Note: these sizes must match descriptor_size() */
properties->samplerDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->combinedImageSamplerDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
properties->sampledImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->storageImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->uniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->robustUniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->storageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->storageBufferDescriptorSize =
pdevice->info->a6xx.storage_16bit ?
2 * A6XX_TEX_CONST_DWORDS * 4 :
A6XX_TEX_CONST_DWORDS * 4;
properties->robustStorageBufferDescriptorSize =
properties->storageBufferDescriptorSize;
properties->inputAttachmentDescriptorSize =
(pdevice->instance->debug_flags & TU_DEBUG_DYNAMIC) ?
A6XX_TEX_CONST_DWORDS * 4 : 0;
properties->maxSamplerDescriptorBufferRange = ~0ull;
properties->maxResourceDescriptorBufferRange = ~0ull;
properties->samplerDescriptorBufferAddressSpaceSize = ~0ull;
properties->resourceDescriptorBufferAddressSpaceSize = ~0ull;
properties->descriptorBufferAddressSpaceSize = ~0ull;
break;
}
default:
break;
}
@@ -2691,6 +2747,8 @@ tu_BindBufferMemory2(VkDevice device,
uint32_t bindInfoCount,
const VkBindBufferMemoryInfo *pBindInfos)
{
TU_FROM_HANDLE(tu_device, dev, device);
for (uint32_t i = 0; i < bindInfoCount; ++i) {
TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
@@ -2698,6 +2756,10 @@ tu_BindBufferMemory2(VkDevice device,
if (mem) {
buffer->bo = mem->bo;
buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
if (buffer->vk.usage &
(VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT))
tu_bo_allow_dump(dev, mem->bo);
} else {
buffer->bo = NULL;
}

View File

@@ -514,6 +514,14 @@ tu_bo_map(struct tu_device *dev, struct tu_bo *bo)
return VK_SUCCESS;
}
void
tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
{
mtx_lock(&dev->bo_mutex);
dev->bo_list[bo->bo_list_idx].flags |= MSM_SUBMIT_BO_DUMP;
mtx_unlock(&dev->bo_mutex);
}
void
tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
{

View File

@@ -97,6 +97,8 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo);
VkResult
tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
void tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo);
static inline struct tu_bo *
tu_bo_get_ref(struct tu_bo *bo)
{

View File

@@ -184,6 +184,11 @@ tu_bo_map(struct tu_device *dev, struct tu_bo *bo)
return VK_SUCCESS;
}
void
tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
{
}
void
tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
{