tu: Implement VK_EXT_descriptor_buffer

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19849>
This commit is contained in:
Connor Abbott
2022-09-19 16:59:53 +02:00
committed by Marge Bot
parent b28899a261
commit cb3872f2cd
9 changed files with 465 additions and 77 deletions

View File

@@ -548,7 +548,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_EXT_depth_clip_control DONE (anv, lvp, radv, tu, v3dv, vn) VK_EXT_depth_clip_control DONE (anv, lvp, radv, tu, v3dv, vn)
VK_EXT_depth_clip_enable DONE (anv, lvp, radv, tu, vn) VK_EXT_depth_clip_enable DONE (anv, lvp, radv, tu, vn)
VK_EXT_depth_range_unrestricted DONE (radv, lvp) VK_EXT_depth_range_unrestricted DONE (radv, lvp)
VK_EXT_descriptor_buffer DONE (radv) VK_EXT_descriptor_buffer DONE (radv, tu)
VK_EXT_discard_rectangles DONE (radv) VK_EXT_discard_rectangles DONE (radv)
VK_EXT_display_control DONE (anv, radv, tu) VK_EXT_display_control DONE (anv, radv, tu)
VK_EXT_extended_dynamic_state3 DONE (lvp, radv, tu) VK_EXT_extended_dynamic_state3 DONE (lvp, radv, tu)

View File

@@ -165,6 +165,12 @@ tu6_emit_flushes(struct tu_cmd_buffer *cmd_buffer,
tu6_emit_event_write(cmd_buffer, cs, CACHE_FLUSH_TS); tu6_emit_event_write(cmd_buffer, cs, CACHE_FLUSH_TS);
if (flushes & TU_CMD_FLAG_CACHE_INVALIDATE) if (flushes & TU_CMD_FLAG_CACHE_INVALIDATE)
tu6_emit_event_write(cmd_buffer, cs, CACHE_INVALIDATE); tu6_emit_event_write(cmd_buffer, cs, CACHE_INVALIDATE);
if (flushes & TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE) {
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
.gfx_bindless = 0x1f,
.cs_bindless = 0x1f,
));
}
if (flushes & TU_CMD_FLAG_WAIT_MEM_WRITES) if (flushes & TU_CMD_FLAG_WAIT_MEM_WRITES)
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
if ((flushes & TU_CMD_FLAG_WAIT_FOR_IDLE) || if ((flushes & TU_CMD_FLAG_WAIT_FOR_IDLE) ||
@@ -2061,6 +2067,64 @@ tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
cmd->state.index_size = index_size; cmd->state.index_size = index_size;
} }
static void
tu6_emit_descriptor_sets(struct tu_cmd_buffer *cmd,
VkPipelineBindPoint bind_point)
{
struct tu_descriptor_state *descriptors_state =
tu_get_descriptors_state(cmd, bind_point);
uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_invalidate_value;
struct tu_cs *cs, state_cs;
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0);
hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f);
cmd->state.desc_sets =
tu_cs_draw_state(&cmd->sub_cs, &state_cs,
4 + 4 * descriptors_state->max_sets_bound +
(descriptors_state->dynamic_bound ? 6 : 0));
cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD;
cs = &state_cs;
} else {
assert(bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0);
hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0);
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f);
cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD;
cs = &cmd->cs;
}
tu_cs_emit_pkt4(cs, sp_bindless_base_reg, 2 * descriptors_state->max_sets_bound);
tu_cs_emit_array(cs, (const uint32_t*)descriptors_state->set_iova, 2 * descriptors_state->max_sets_bound);
tu_cs_emit_pkt4(cs, hlsq_bindless_base_reg, 2 * descriptors_state->max_sets_bound);
tu_cs_emit_array(cs, (const uint32_t*)descriptors_state->set_iova, 2 * descriptors_state->max_sets_bound);
/* Dynamic descriptors get the last descriptor set. */
if (descriptors_state->dynamic_bound) {
tu_cs_emit_pkt4(cs, sp_bindless_base_reg + 4 * 2, 2);
tu_cs_emit_qw(cs, descriptors_state->set_iova[MAX_SETS]);
tu_cs_emit_pkt4(cs, hlsq_bindless_base_reg + 4 * 2, 2);
tu_cs_emit_qw(cs, descriptors_state->set_iova[MAX_SETS]);
}
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(.dword = hlsq_invalidate_value));
if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
assert(cs->cur == cs->end); /* validate draw state size */
/* note: this also avoids emitting draw states before renderpass clears,
* which may use the 3D clear path (for MSAA cases)
*/
if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) {
tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3);
tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets);
}
}
}
VKAPI_ATTR void VKAPI_CALL VKAPI_ATTR void VKAPI_CALL
tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint, VkPipelineBindPoint pipelineBindPoint,
@@ -2086,6 +2150,7 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]); TU_FROM_HANDLE(tu_descriptor_set, set, pDescriptorSets[i]);
descriptors_state->sets[idx] = set; descriptors_state->sets[idx] = set;
descriptors_state->set_iova[idx] = set->va | 3;
if (!set) if (!set)
continue; continue;
@@ -2138,17 +2203,6 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
} }
assert(dyn_idx == dynamicOffsetCount); assert(dyn_idx == dynamicOffsetCount);
uint32_t sp_bindless_base_reg, hlsq_bindless_base_reg, hlsq_invalidate_value;
uint64_t addr[MAX_SETS] = {};
uint64_t dynamic_addr = 0;
struct tu_cs *cs, state_cs;
for (uint32_t i = 0; i < descriptors_state->max_sets_bound; i++) {
struct tu_descriptor_set *set = descriptors_state->sets[i];
if (set)
addr[i] = set->va | 3;
}
if (layout->dynamic_offset_size) { if (layout->dynamic_offset_size) {
/* allocate and fill out dynamic descriptor set */ /* allocate and fill out dynamic descriptor set */
struct tu_cs_memory dynamic_desc_set; struct tu_cs_memory dynamic_desc_set;
@@ -2162,57 +2216,79 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
memcpy(dynamic_desc_set.map, descriptors_state->dynamic_descriptors, memcpy(dynamic_desc_set.map, descriptors_state->dynamic_descriptors,
layout->dynamic_offset_size); layout->dynamic_offset_size);
dynamic_addr = dynamic_desc_set.iova | 3; descriptors_state->set_iova[MAX_SETS] = dynamic_desc_set.iova | 3;
descriptors_state->dynamic_bound = true; descriptors_state->dynamic_bound = true;
} }
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { tu6_emit_descriptor_sets(cmd, pipelineBindPoint);
sp_bindless_base_reg = REG_A6XX_SP_BINDLESS_BASE(0); }
hlsq_bindless_base_reg = REG_A6XX_HLSQ_BINDLESS_BASE(0);
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_GFX_BINDLESS(0x1f);
cmd->state.desc_sets = VKAPI_ATTR void VKAPI_CALL
tu_cs_draw_state(&cmd->sub_cs, &state_cs, tu_CmdBindDescriptorBuffersEXT(
4 + 4 * descriptors_state->max_sets_bound + VkCommandBuffer commandBuffer,
(descriptors_state->dynamic_bound ? 6 : 0)); uint32_t bufferCount,
cmd->state.dirty |= TU_CMD_DIRTY_DESC_SETS_LOAD; const VkDescriptorBufferBindingInfoEXT *pBindingInfos)
cs = &state_cs; {
} else { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
assert(pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE);
sp_bindless_base_reg = REG_A6XX_SP_CS_BINDLESS_BASE(0); for (unsigned i = 0; i < bufferCount; i++)
hlsq_bindless_base_reg = REG_A6XX_HLSQ_CS_BINDLESS_BASE(0); cmd->state.descriptor_buffer_iova[i] = pBindingInfos[i].address;
hlsq_invalidate_value = A6XX_HLSQ_INVALIDATE_CMD_CS_BINDLESS(0x1f); }
cmd->state.dirty |= TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD; VKAPI_ATTR void VKAPI_CALL
cs = &cmd->cs; tu_CmdSetDescriptorBufferOffsetsEXT(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout _layout,
uint32_t firstSet,
uint32_t setCount,
const uint32_t *pBufferIndices,
const VkDeviceSize *pOffsets)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
TU_FROM_HANDLE(tu_pipeline_layout, layout, _layout);
struct tu_descriptor_state *descriptors_state =
tu_get_descriptors_state(cmd, pipelineBindPoint);
descriptors_state->max_sets_bound =
MAX2(descriptors_state->max_sets_bound, firstSet + setCount);
for (unsigned i = 0; i < setCount; ++i) {
unsigned idx = i + firstSet;
struct tu_descriptor_set_layout *set_layout = layout->set[idx].layout;
descriptors_state->set_iova[idx] =
(cmd->state.descriptor_buffer_iova[pBufferIndices[i]] + pOffsets[i]) | 3;
if (set_layout->has_inline_uniforms)
cmd->state.dirty |= TU_CMD_DIRTY_SHADER_CONSTS;
} }
tu_cs_emit_pkt4(cs, sp_bindless_base_reg, 2 * descriptors_state->max_sets_bound); tu6_emit_descriptor_sets(cmd, pipelineBindPoint);
tu_cs_emit_array(cs, (const uint32_t*) addr, 2 * descriptors_state->max_sets_bound); }
tu_cs_emit_pkt4(cs, hlsq_bindless_base_reg, 2 * descriptors_state->max_sets_bound);
tu_cs_emit_array(cs, (const uint32_t*) addr, 2 * descriptors_state->max_sets_bound);
/* Dynamic descriptors get the last descriptor set. */ VKAPI_ATTR void VKAPI_CALL
if (descriptors_state->dynamic_bound) { tu_CmdBindDescriptorBufferEmbeddedSamplersEXT(
tu_cs_emit_pkt4(cs, sp_bindless_base_reg + 4 * 2, 2); VkCommandBuffer commandBuffer,
tu_cs_emit_qw(cs, dynamic_addr); VkPipelineBindPoint pipelineBindPoint,
tu_cs_emit_pkt4(cs, hlsq_bindless_base_reg + 4 * 2, 2); VkPipelineLayout _layout,
tu_cs_emit_qw(cs, dynamic_addr); uint32_t set)
} {
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
TU_FROM_HANDLE(tu_pipeline_layout, layout, _layout);
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(.dword = hlsq_invalidate_value)); struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { struct tu_descriptor_state *descriptors_state =
assert(cs->cur == cs->end); /* validate draw state size */ tu_get_descriptors_state(cmd, pipelineBindPoint);
/* note: this also avoids emitting draw states before renderpass clears,
* which may use the 3D clear path (for MSAA cases) descriptors_state->max_sets_bound =
*/ MAX2(descriptors_state->max_sets_bound, set + 1);
if (!(cmd->state.dirty & TU_CMD_DIRTY_DRAW_STATE)) {
tu_cs_emit_pkt7(&cmd->draw_cs, CP_SET_DRAW_STATE, 3); descriptors_state->set_iova[set] = set_layout->embedded_samplers->iova | 3;
tu_cs_emit_draw_state(&cmd->draw_cs, TU_DRAW_STATE_DESC_SETS, cmd->state.desc_sets);
} tu6_emit_descriptor_sets(cmd, pipelineBindPoint);
}
} }
static enum VkResult static enum VkResult
@@ -3489,6 +3565,10 @@ tu_flush_for_access(struct tu_cache_state *cache,
DST_INCOHERENT_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR) DST_INCOHERENT_FLUSH(CCU_COLOR, CCU_FLUSH_COLOR, CCU_INVALIDATE_COLOR)
DST_INCOHERENT_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH) DST_INCOHERENT_FLUSH(CCU_DEPTH, CCU_FLUSH_DEPTH, CCU_INVALIDATE_DEPTH)
if (dst_mask & TU_ACCESS_BINDLESS_DESCRIPTOR_READ) {
flush_bits |= TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE;
}
#undef DST_INCOHERENT_FLUSH #undef DST_INCOHERENT_FLUSH
cache->flush_bits |= flush_bits; cache->flush_bits |= flush_bits;
@@ -3592,6 +3672,12 @@ vk2tu_access(VkAccessFlags2 flags, VkPipelineStageFlags2 stages, bool image_only
SHADER_STAGES)) SHADER_STAGES))
mask |= TU_ACCESS_UCHE_READ; mask |= TU_ACCESS_UCHE_READ;
if (gfx_read_access(flags, stages,
VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT,
SHADER_STAGES)) {
mask |= TU_ACCESS_UCHE_READ | TU_ACCESS_BINDLESS_DESCRIPTOR_READ;
}
if (gfx_write_access(flags, stages, if (gfx_write_access(flags, stages,
VK_ACCESS_2_SHADER_WRITE_BIT | VK_ACCESS_2_SHADER_WRITE_BIT |
VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT,
@@ -4492,6 +4578,8 @@ tu6_emit_user_consts(struct tu_cs *cs,
for (unsigned i = 0; i < link->tu_const_state.num_inline_ubos; i++) { for (unsigned i = 0; i < link->tu_const_state.num_inline_ubos; i++) {
const struct tu_inline_ubo *ubo = &link->tu_const_state.ubos[i]; const struct tu_inline_ubo *ubo = &link->tu_const_state.ubos[i];
uint64_t va = descriptors->set_iova[ubo->base] & ~0x3f;
tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), ubo->push_address ? 7 : 3); tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), ubo->push_address ? 7 : 3);
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(ubo->const_offset_vec4) | tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(ubo->const_offset_vec4) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
@@ -4501,11 +4589,11 @@ tu6_emit_user_consts(struct tu_cs *cs,
if (ubo->push_address) { if (ubo->push_address) {
tu_cs_emit(cs, 0); tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0); tu_cs_emit(cs, 0);
tu_cs_emit_qw(cs, descriptors->sets[ubo->base]->va + ubo->offset); tu_cs_emit_qw(cs, va + ubo->offset);
tu_cs_emit(cs, 0); tu_cs_emit(cs, 0);
tu_cs_emit(cs, 0); tu_cs_emit(cs, 0);
} else { } else {
tu_cs_emit_qw(cs, descriptors->sets[ubo->base]->va + ubo->offset); tu_cs_emit_qw(cs, va + ubo->offset);
} }
} }
} }

View File

@@ -46,6 +46,7 @@ struct tu_descriptor_state
struct tu_descriptor_set *sets[MAX_SETS]; struct tu_descriptor_set *sets[MAX_SETS];
struct tu_descriptor_set push_set; struct tu_descriptor_set push_set;
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE]; uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE];
uint64_t set_iova[MAX_SETS + 1];
uint32_t max_sets_bound; uint32_t max_sets_bound;
bool dynamic_bound; bool dynamic_bound;
}; };
@@ -120,13 +121,20 @@ enum tu_cmd_access_mask {
*/ */
TU_ACCESS_CP_WRITE = 1 << 12, TU_ACCESS_CP_WRITE = 1 << 12,
/* Descriptors are read through UCHE but are also prefetched via
* CP_LOAD_STATE6 and the prefetched descriptors need to be invalidated
* when they change.
*/
TU_ACCESS_BINDLESS_DESCRIPTOR_READ = 1 << 13,
TU_ACCESS_READ = TU_ACCESS_READ =
TU_ACCESS_UCHE_READ | TU_ACCESS_UCHE_READ |
TU_ACCESS_CCU_COLOR_READ | TU_ACCESS_CCU_COLOR_READ |
TU_ACCESS_CCU_DEPTH_READ | TU_ACCESS_CCU_DEPTH_READ |
TU_ACCESS_CCU_COLOR_INCOHERENT_READ | TU_ACCESS_CCU_COLOR_INCOHERENT_READ |
TU_ACCESS_CCU_DEPTH_INCOHERENT_READ | TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |
TU_ACCESS_SYSMEM_READ, TU_ACCESS_SYSMEM_READ |
TU_ACCESS_BINDLESS_DESCRIPTOR_READ,
TU_ACCESS_WRITE = TU_ACCESS_WRITE =
TU_ACCESS_UCHE_WRITE | TU_ACCESS_UCHE_WRITE |
@@ -203,6 +211,7 @@ enum tu_cmd_flush_bits {
TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6, TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6,
TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7, TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7,
TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8, TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8,
TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE = 1 << 9,
TU_CMD_FLAG_ALL_FLUSH = TU_CMD_FLAG_ALL_FLUSH =
TU_CMD_FLAG_CCU_FLUSH_DEPTH | TU_CMD_FLAG_CCU_FLUSH_DEPTH |
@@ -217,6 +226,7 @@ enum tu_cmd_flush_bits {
TU_CMD_FLAG_CCU_INVALIDATE_DEPTH | TU_CMD_FLAG_CCU_INVALIDATE_DEPTH |
TU_CMD_FLAG_CCU_INVALIDATE_COLOR | TU_CMD_FLAG_CCU_INVALIDATE_COLOR |
TU_CMD_FLAG_CACHE_INVALIDATE | TU_CMD_FLAG_CACHE_INVALIDATE |
TU_CMD_FLAG_BINDLESS_DESCRIPTOR_INVALIDATE |
/* Treat CP_WAIT_FOR_ME as a "cache" that needs to be invalidated when a /* Treat CP_WAIT_FOR_ME as a "cache" that needs to be invalidated when a
* a command that needs CP_WAIT_FOR_ME is executed. This means we may * a command that needs CP_WAIT_FOR_ME is executed. This means we may
* insert an extra WAIT_FOR_ME before an indirect command requiring it * insert an extra WAIT_FOR_ME before an indirect command requiring it
@@ -527,6 +537,8 @@ struct tu_cmd_state
struct tu_vs_params last_vs_params; struct tu_vs_params last_vs_params;
struct tu_primitive_params last_prim_params; struct tu_primitive_params last_prim_params;
uint64_t descriptor_buffer_iova[MAX_SETS];
}; };
struct tu_cmd_buffer struct tu_cmd_buffer

View File

@@ -27,6 +27,7 @@
#include "tu_device.h" #include "tu_device.h"
#include "tu_image.h" #include "tu_image.h"
#include "tu_formats.h"
static inline uint8_t * static inline uint8_t *
pool_base(struct tu_descriptor_pool *pool) pool_base(struct tu_descriptor_pool *pool)
@@ -93,6 +94,19 @@ mutable_descriptor_size(struct tu_device *dev,
return max_size; return max_size;
} }
static void
tu_descriptor_set_layout_destroy(struct vk_device *vk_dev,
struct vk_descriptor_set_layout *vk_layout)
{
struct tu_device *dev = container_of(vk_dev, struct tu_device, vk);
struct tu_descriptor_set_layout *layout =
container_of(vk_layout, struct tu_descriptor_set_layout, vk);
if (layout->embedded_samplers)
tu_bo_finish(dev, layout->embedded_samplers);
vk_descriptor_set_layout_destroy(vk_dev, vk_layout);
}
VKAPI_ATTR VkResult VKAPI_CALL VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDescriptorSetLayout( tu_CreateDescriptorSetLayout(
VkDevice _device, VkDevice _device,
@@ -149,6 +163,7 @@ tu_CreateDescriptorSetLayout(
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
set_layout->flags = pCreateInfo->flags; set_layout->flags = pCreateInfo->flags;
set_layout->vk.destroy = tu_descriptor_set_layout_destroy;
/* We just allocate all the immutable samplers at the end of the struct */ /* We just allocate all the immutable samplers at the end of the struct */
struct tu_sampler *samplers = (void*) &set_layout->binding[num_bindings]; struct tu_sampler *samplers = (void*) &set_layout->binding[num_bindings];
@@ -256,6 +271,38 @@ tu_CreateDescriptorSetLayout(
set_layout->dynamic_offset_size = dynamic_offset_size; set_layout->dynamic_offset_size = dynamic_offset_size;
if (pCreateInfo->flags &
VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT) {
result = tu_bo_init_new(device, &set_layout->embedded_samplers,
set_layout->size, TU_BO_ALLOC_ALLOW_DUMP,
"embedded samplers");
if (result != VK_SUCCESS) {
vk_object_free(&device->vk, pAllocator, set_layout);
return vk_error(device, result);
}
result = tu_bo_map(device, set_layout->embedded_samplers);
if (result != VK_SUCCESS) {
tu_bo_finish(device, set_layout->embedded_samplers);
vk_object_free(&device->vk, pAllocator, set_layout);
return vk_error(device, result);
}
char *map = set_layout->embedded_samplers->map;
for (unsigned i = 0; i < set_layout->binding_count; i++) {
if (!set_layout->binding[i].immutable_samplers_offset)
continue;
unsigned offset = set_layout->binding[i].offset;
const struct tu_sampler *sampler =
(const struct tu_sampler *)((const char *)set_layout +
set_layout->binding[i].immutable_samplers_offset);
assert(set_layout->binding[i].array_size == 1);
memcpy(map + offset, sampler->descriptor,
sizeof(sampler->descriptor));
}
}
*pSetLayout = tu_descriptor_set_layout_to_handle(set_layout); *pSetLayout = tu_descriptor_set_layout_to_handle(set_layout);
return VK_SUCCESS; return VK_SUCCESS;
@@ -360,6 +407,30 @@ out:
pSupport->supported = supported; pSupport->supported = supported;
} }
VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorSetLayoutSizeEXT(
VkDevice _device,
VkDescriptorSetLayout _layout,
VkDeviceSize *pLayoutSizeInBytes)
{
TU_FROM_HANDLE(tu_descriptor_set_layout, layout, _layout);
*pLayoutSizeInBytes = layout->size;
}
VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorSetLayoutBindingOffsetEXT(
VkDevice _device,
VkDescriptorSetLayout _layout,
uint32_t binding,
VkDeviceSize *pOffset)
{
TU_FROM_HANDLE(tu_descriptor_set_layout, layout, _layout);
assert(binding < layout->binding_count);
*pOffset = layout->binding[binding].offset;
}
/* Note: we must hash any values used in tu_lower_io(). */ /* Note: we must hash any values used in tu_lower_io(). */
#define SHA1_UPDATE_VALUE(ctx, x) _mesa_sha1_update(ctx, &(x), sizeof(x)); #define SHA1_UPDATE_VALUE(ctx, x) _mesa_sha1_update(ctx, &(x), sizeof(x));
@@ -905,6 +976,21 @@ tu_FreeDescriptorSets(VkDevice _device,
return VK_SUCCESS; return VK_SUCCESS;
} }
static void
write_texel_buffer_descriptor_addr(uint32_t *dst,
const VkDescriptorAddressInfoEXT *buffer_info)
{
if (!buffer_info || buffer_info->address == 0) {
memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
} else {
uint8_t swiz[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
PIPE_SWIZZLE_W };
fdl6_buffer_view_init(dst,
tu_vk_format_to_pipe_format(buffer_info->format),
swiz, buffer_info->address, buffer_info->range);
}
}
static void static void
write_texel_buffer_descriptor(uint32_t *dst, const VkBufferView buffer_view) write_texel_buffer_descriptor(uint32_t *dst, const VkBufferView buffer_view)
{ {
@@ -917,10 +1003,24 @@ write_texel_buffer_descriptor(uint32_t *dst, const VkBufferView buffer_view)
} }
} }
static VkDescriptorAddressInfoEXT
buffer_info_to_address(const VkDescriptorBufferInfo *buffer_info)
{
TU_FROM_HANDLE(tu_buffer, buffer, buffer_info->buffer);
uint32_t range = buffer ? vk_buffer_range(&buffer->vk, buffer_info->offset, buffer_info->range) : 0;
uint64_t va = buffer ? buffer->iova + buffer_info->offset : 0;
return (VkDescriptorAddressInfoEXT) {
.address = va,
.range = range,
};
}
static void static void
write_buffer_descriptor(const struct tu_device *device, write_buffer_descriptor_addr(const struct tu_device *device,
uint32_t *dst, uint32_t *dst,
const VkDescriptorBufferInfo *buffer_info) const VkDescriptorAddressInfoEXT *buffer_info)
{ {
bool storage_16bit = device->physical_device->info->a6xx.storage_16bit; bool storage_16bit = device->physical_device->info->a6xx.storage_16bit;
/* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit /* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit
@@ -928,16 +1028,14 @@ write_buffer_descriptor(const struct tu_device *device,
* isam. * isam.
*/ */
unsigned descriptors = storage_16bit ? 2 : 1; unsigned descriptors = storage_16bit ? 2 : 1;
if (buffer_info->buffer == VK_NULL_HANDLE) {
if (!buffer_info || buffer_info->address == 0) {
memset(dst, 0, descriptors * A6XX_TEX_CONST_DWORDS * sizeof(uint32_t)); memset(dst, 0, descriptors * A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
return; return;
} }
TU_FROM_HANDLE(tu_buffer, buffer, buffer_info->buffer); uint64_t va = buffer_info->address;
uint32_t range = buffer_info->range;
assert((buffer_info->offset & 63) == 0); /* minStorageBufferOffsetAlignment */
uint64_t va = buffer->iova + buffer_info->offset;
uint32_t range = vk_buffer_range(&buffer->vk, buffer_info->offset, buffer_info->range);
for (unsigned i = 0; i < descriptors; i++) { for (unsigned i = 0; i < descriptors; i++) {
if (storage_16bit && i == 0) { if (storage_16bit && i == 0) {
@@ -959,30 +1057,43 @@ write_buffer_descriptor(const struct tu_device *device,
} }
static void static void
write_ubo_descriptor(uint32_t *dst, const VkDescriptorBufferInfo *buffer_info) write_buffer_descriptor(const struct tu_device *device,
uint32_t *dst,
const VkDescriptorBufferInfo *buffer_info)
{ {
if (buffer_info->buffer == VK_NULL_HANDLE) { VkDescriptorAddressInfoEXT addr = buffer_info_to_address(buffer_info);
write_buffer_descriptor_addr(device, dst, &addr);
}
static void
write_ubo_descriptor_addr(uint32_t *dst,
const VkDescriptorAddressInfoEXT *buffer_info)
{
if (!buffer_info) {
dst[0] = dst[1] = 0; dst[0] = dst[1] = 0;
return; return;
} }
TU_FROM_HANDLE(tu_buffer, buffer, buffer_info->buffer); uint64_t va = buffer_info->address;
uint32_t range = vk_buffer_range(&buffer->vk, buffer_info->offset, buffer_info->range);
/* The HW range is in vec4 units */ /* The HW range is in vec4 units */
range = ALIGN_POT(range, 16) / 16; uint32_t range = va ? DIV_ROUND_UP(buffer_info->range, 16) : 0;
uint64_t va = buffer->iova + buffer_info->offset;
dst[0] = A6XX_UBO_0_BASE_LO(va); dst[0] = A6XX_UBO_0_BASE_LO(va);
dst[1] = A6XX_UBO_1_BASE_HI(va >> 32) | A6XX_UBO_1_SIZE(range); dst[1] = A6XX_UBO_1_BASE_HI(va >> 32) | A6XX_UBO_1_SIZE(range);
} }
static void
write_ubo_descriptor(uint32_t *dst, const VkDescriptorBufferInfo *buffer_info)
{
VkDescriptorAddressInfoEXT addr = buffer_info_to_address(buffer_info);
write_ubo_descriptor_addr(dst, &addr);
}
static void static void
write_image_descriptor(uint32_t *dst, write_image_descriptor(uint32_t *dst,
VkDescriptorType descriptor_type, VkDescriptorType descriptor_type,
const VkDescriptorImageInfo *image_info) const VkDescriptorImageInfo *image_info)
{ {
if (image_info->imageView == VK_NULL_HANDLE) { if (!image_info || image_info->imageView == VK_NULL_HANDLE) {
memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t)); memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
return; return;
} }
@@ -1006,14 +1117,15 @@ write_combined_image_sampler_descriptor(uint32_t *dst,
/* copy over sampler state */ /* copy over sampler state */
if (has_sampler) { if (has_sampler) {
TU_FROM_HANDLE(tu_sampler, sampler, image_info->sampler); TU_FROM_HANDLE(tu_sampler, sampler, image_info->sampler);
memcpy(dst + A6XX_TEX_CONST_DWORDS, sampler->descriptor, sizeof(sampler->descriptor)); memcpy(dst + A6XX_TEX_CONST_DWORDS, sampler->descriptor, sizeof(sampler->descriptor));
} }
} }
static void static void
write_sampler_descriptor(uint32_t *dst, const VkDescriptorImageInfo *image_info) write_sampler_descriptor(uint32_t *dst, VkSampler _sampler)
{ {
TU_FROM_HANDLE(tu_sampler, sampler, image_info->sampler); TU_FROM_HANDLE(tu_sampler, sampler, _sampler);
memcpy(dst, sampler->descriptor, sizeof(sampler->descriptor)); memcpy(dst, sampler->descriptor, sizeof(sampler->descriptor));
} }
@@ -1025,6 +1137,103 @@ write_sampler_push(uint32_t *dst, const struct tu_sampler *sampler)
memcpy(dst, sampler->descriptor, sizeof(sampler->descriptor)); memcpy(dst, sampler->descriptor, sizeof(sampler->descriptor));
} }
VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorEXT(
VkDevice _device,
const VkDescriptorGetInfoEXT *pDescriptorInfo,
size_t dataSize,
void *pDescriptor)
{
TU_FROM_HANDLE(tu_device, device, _device);
switch (pDescriptorInfo->type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
write_ubo_descriptor_addr(pDescriptor, pDescriptorInfo->data.pUniformBuffer);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
write_buffer_descriptor_addr(device, pDescriptor, pDescriptorInfo->data.pStorageBuffer);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
write_texel_buffer_descriptor_addr(pDescriptor, pDescriptorInfo->data.pUniformTexelBuffer);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
write_texel_buffer_descriptor_addr(pDescriptor, pDescriptorInfo->data.pStorageTexelBuffer);
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
write_image_descriptor(pDescriptor, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
pDescriptorInfo->data.pSampledImage);
break;
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
write_image_descriptor(pDescriptor, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
pDescriptorInfo->data.pStorageImage);
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
write_combined_image_sampler_descriptor(pDescriptor,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
pDescriptorInfo->data.pCombinedImageSampler,
true);
break;
case VK_DESCRIPTOR_TYPE_SAMPLER:
write_sampler_descriptor(pDescriptor, *pDescriptorInfo->data.pSampler);
break;
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
/* nothing in descriptor set - framebuffer state is used instead */
if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
write_image_descriptor(pDescriptor, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
pDescriptorInfo->data.pInputAttachmentImage);
}
break;
default:
unreachable("unimplemented descriptor type");
break;
}
}
/* We don't have any mutable state in buffers, images, image views, or
* samplers, so we shouldn't need to save/restore anything to get the same
* descriptor back as long as the user uses the same iova.
*/
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetBufferOpaqueCaptureDescriptorDataEXT(VkDevice device,
const VkBufferCaptureDescriptorDataInfoEXT *pInfo,
void *pData)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetImageOpaqueCaptureDescriptorDataEXT(VkDevice device,
const VkImageCaptureDescriptorDataInfoEXT *pInfo,
void *pData)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetImageViewOpaqueCaptureDescriptorDataEXT(VkDevice device,
const VkImageViewCaptureDescriptorDataInfoEXT *pInfo,
void *pData)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetSamplerOpaqueCaptureDescriptorDataEXT(VkDevice _device,
const VkSamplerCaptureDescriptorDataInfoEXT *pInfo,
void *pData)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
tu_GetAccelerationStructureOpaqueCaptureDescriptorDataEXT(VkDevice device,
const VkAccelerationStructureCaptureDescriptorDataInfoEXT *pInfo,
void *pData)
{
return VK_SUCCESS;
}
void void
tu_update_descriptor_sets(const struct tu_device *device, tu_update_descriptor_sets(const struct tu_device *device,
VkDescriptorSet dstSetOverride, VkDescriptorSet dstSetOverride,
@@ -1124,7 +1333,7 @@ tu_update_descriptor_sets(const struct tu_device *device,
break; break;
case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLER:
if (!binding_layout->immutable_samplers_offset) if (!binding_layout->immutable_samplers_offset)
write_sampler_descriptor(ptr, writeset->pImageInfo + j); write_sampler_descriptor(ptr, writeset->pImageInfo[j].sampler);
else if (copy_immutable_samplers) else if (copy_immutable_samplers)
write_sampler_push(ptr, &samplers[writeset->dstArrayElement + j]); write_sampler_push(ptr, &samplers[writeset->dstArrayElement + j]);
break; break;
@@ -1453,7 +1662,7 @@ tu_update_descriptor_set_with_template(
break; break;
case VK_DESCRIPTOR_TYPE_SAMPLER: case VK_DESCRIPTOR_TYPE_SAMPLER:
if (templ->entry[i].has_sampler) if (templ->entry[i].has_sampler)
write_sampler_descriptor(ptr, src); write_sampler_descriptor(ptr, ((const VkDescriptorImageInfo *)src)->sampler);
else if (samplers) else if (samplers)
write_sampler_push(ptr, &samplers[j]); write_sampler_push(ptr, &samplers[j]);
break; break;

View File

@@ -75,6 +75,8 @@ struct tu_descriptor_set_layout
bool has_variable_descriptors; bool has_variable_descriptors;
bool has_inline_uniforms; bool has_inline_uniforms;
struct tu_bo *embedded_samplers;
/* Bindings in this descriptor set */ /* Bindings in this descriptor set */
struct tu_descriptor_set_binding_layout binding[0]; struct tu_descriptor_set_binding_layout binding[0];
}; };

View File

@@ -254,6 +254,7 @@ get_device_extensions(const struct tu_physical_device *device,
.KHR_pipeline_library = true, .KHR_pipeline_library = true,
.EXT_graphics_pipeline_library = true, .EXT_graphics_pipeline_library = true,
.EXT_post_depth_coverage = true, .EXT_post_depth_coverage = true,
.EXT_descriptor_buffer = true,
}; };
} }
@@ -980,6 +981,15 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait; features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
break; break;
} }
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT: {
VkPhysicalDeviceDescriptorBufferFeaturesEXT *features =
(VkPhysicalDeviceDescriptorBufferFeaturesEXT *)ext;
features->descriptorBuffer = true;
features->descriptorBufferCaptureReplay = pdevice->has_set_iova;
features->descriptorBufferImageLayoutIgnored = true;
features->descriptorBufferPushDescriptors = true;
break;
}
default: default:
break; break;
@@ -1451,6 +1461,52 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
properties->dynamicPrimitiveTopologyUnrestricted = true; properties->dynamicPrimitiveTopologyUnrestricted = true;
break; break;
} }
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT: {
VkPhysicalDeviceDescriptorBufferPropertiesEXT *properties =
(VkPhysicalDeviceDescriptorBufferPropertiesEXT *)ext;
properties->combinedImageSamplerDescriptorSingleArray = true;
properties->bufferlessPushDescriptors = true;
properties->allowSamplerImageViewPostSubmitCreation = true;
properties->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
properties->maxDescriptorBufferBindings = MAX_SETS;
properties->maxResourceDescriptorBufferBindings = MAX_SETS;
properties->maxSamplerDescriptorBufferBindings = MAX_SETS;
properties->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
properties->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
properties->bufferCaptureReplayDescriptorDataSize = 0;
properties->imageCaptureReplayDescriptorDataSize = 0;
properties->imageViewCaptureReplayDescriptorDataSize = 0;
properties->samplerCaptureReplayDescriptorDataSize = 0;
properties->accelerationStructureCaptureReplayDescriptorDataSize = 0;
/* Note: these sizes must match descriptor_size() */
properties->samplerDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->combinedImageSamplerDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
properties->sampledImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->storageImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->uniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->robustUniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->storageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
properties->storageBufferDescriptorSize =
pdevice->info->a6xx.storage_16bit ?
2 * A6XX_TEX_CONST_DWORDS * 4 :
A6XX_TEX_CONST_DWORDS * 4;
properties->robustStorageBufferDescriptorSize =
properties->storageBufferDescriptorSize;
properties->inputAttachmentDescriptorSize =
(pdevice->instance->debug_flags & TU_DEBUG_DYNAMIC) ?
A6XX_TEX_CONST_DWORDS * 4 : 0;
properties->maxSamplerDescriptorBufferRange = ~0ull;
properties->maxResourceDescriptorBufferRange = ~0ull;
properties->samplerDescriptorBufferAddressSpaceSize = ~0ull;
properties->resourceDescriptorBufferAddressSpaceSize = ~0ull;
properties->descriptorBufferAddressSpaceSize = ~0ull;
break;
}
default: default:
break; break;
} }
@@ -2691,6 +2747,8 @@ tu_BindBufferMemory2(VkDevice device,
uint32_t bindInfoCount, uint32_t bindInfoCount,
const VkBindBufferMemoryInfo *pBindInfos) const VkBindBufferMemoryInfo *pBindInfos)
{ {
TU_FROM_HANDLE(tu_device, dev, device);
for (uint32_t i = 0; i < bindInfoCount; ++i) { for (uint32_t i = 0; i < bindInfoCount; ++i) {
TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory); TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer); TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
@@ -2698,6 +2756,10 @@ tu_BindBufferMemory2(VkDevice device,
if (mem) { if (mem) {
buffer->bo = mem->bo; buffer->bo = mem->bo;
buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset; buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
if (buffer->vk.usage &
(VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT))
tu_bo_allow_dump(dev, mem->bo);
} else { } else {
buffer->bo = NULL; buffer->bo = NULL;
} }

View File

@@ -514,6 +514,14 @@ tu_bo_map(struct tu_device *dev, struct tu_bo *bo)
return VK_SUCCESS; return VK_SUCCESS;
} }
void
tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
{
mtx_lock(&dev->bo_mutex);
dev->bo_list[bo->bo_list_idx].flags |= MSM_SUBMIT_BO_DUMP;
mtx_unlock(&dev->bo_mutex);
}
void void
tu_bo_finish(struct tu_device *dev, struct tu_bo *bo) tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
{ {

View File

@@ -97,6 +97,8 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo);
VkResult VkResult
tu_bo_map(struct tu_device *dev, struct tu_bo *bo); tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
void tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo);
static inline struct tu_bo * static inline struct tu_bo *
tu_bo_get_ref(struct tu_bo *bo) tu_bo_get_ref(struct tu_bo *bo)
{ {

View File

@@ -184,6 +184,11 @@ tu_bo_map(struct tu_device *dev, struct tu_bo *bo)
return VK_SUCCESS; return VK_SUCCESS;
} }
void
tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
{
}
void void
tu_bo_finish(struct tu_device *dev, struct tu_bo *bo) tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
{ {