tu: use either the 16-bit or 32-bit descriptor

Until now, if the 16-bit storage functionality is supported by the
hardware, two separate descriptors were set up, with isam loads and stores
piping through the descriptor of the corresponding size and other storage
access using the 16-bit descriptor.

These changes keep separate descriptors on a650, but leverage post-a650
isam.v functionality that enables use of 16-bit descriptors for 32-bit
loads, removing the need for the separate 32-bit descriptor.

Storage buffer descriptors are set up according to 16-bit storage support
and the indicated isam.v support, using those descriptors for 32-bit isam
loads as well if the latter is present.

Dynamic offset application in tu_CmdBindDescriptorSets is modified to
determine the offset shift value based on the descriptor's format and not
on the descriptor's position in the layout binding.

Signed-off-by: Zan Dobersek <zdobersek@igalia.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28254>
This commit is contained in:
Zan Dobersek
2024-03-18 18:53:07 +01:00
committed by Marge Bot
parent 1d418a3419
commit a9b781fa54
6 changed files with 62 additions and 54 deletions

View File

@@ -87,9 +87,7 @@ struct fd_dev_info {
/* Does the hw support GL_QCOM_shading_rate? */
bool has_shading_rate;
/* newer a6xx allows using 16-bit descriptor for both 16-bit
* and 32-bit access
*/
/* Whether a 16-bit descriptor can be used */
bool storage_16bit;
/* The latest known a630_sqe.fw fails to wait for WFI before

View File

@@ -65,7 +65,7 @@ struct ir3_compiler_options {
int bindless_fb_read_descriptor;
int bindless_fb_read_slot;
/* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */
/* True if 16-bit descriptors are available. */
bool storage_16bit;
/* If base_vertex should be lowered in nir */
@@ -214,9 +214,6 @@ struct ir3_compiler {
/* Whether isam/stib/ldib have immediate offsets. */
bool has_ssbo_imm_offsets;
/* True if 16-bit descriptors are used for both 16-bit and 32-bit access. */
bool storage_16bit;
/* True if getfiberid, getlast.w8, brcst.active, and quad_shuffle
* instructions are supported which are necessary to support
* subgroup quad and arithmetic operations.

View File

@@ -2781,13 +2781,24 @@ tu_bind_descriptor_sets(struct tu_cmd_buffer *cmd,
A6XX_TEX_CONST_2_STARTOFFSETTEXELS__MASK) >>
A6XX_TEX_CONST_2_STARTOFFSETTEXELS__SHIFT;
/* Without the ability to cast 16-bit as 32-bit, there is
* only one descriptor whose texels are 32 bits (4
* bytes). With casting, there are two descriptors, the
* first being 16-bit and the second being 32-bit.
/* Use descriptor's format to determine the shift amount
* that's to be used on the offset value.
*/
unsigned offset_shift =
binding->size == 4 * A6XX_TEX_CONST_DWORDS || i == 1 ? 2 : 1;
uint32_t format = (dst_desc[0] &
A6XX_TEX_CONST_0_FMT__MASK) >>
A6XX_TEX_CONST_0_FMT__SHIFT;
unsigned offset_shift;
switch (format) {
case FMT6_16_UINT:
offset_shift = 1;
break;
case FMT6_32_UINT:
offset_shift = 2;
break;
default:
offset_shift = 0;
break;
}
va += desc_offset << offset_shift;
va += offset;

View File

@@ -63,14 +63,13 @@ descriptor_size(struct tu_device *dev,
return A6XX_TEX_CONST_DWORDS * 4 * 2;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
/* When we support 16-bit storage, we need an extra descriptor setup as
* a 32-bit array for isam to work.
/* isam.v allows using a single 16-bit descriptor for both 16-bit and
* 32-bit loads. If not available but 16-bit storage is still supported,
* two separate descriptors are required.
*/
if (dev->physical_device->info->a6xx.storage_16bit) {
return A6XX_TEX_CONST_DWORDS * 4 * 2;
} else {
return A6XX_TEX_CONST_DWORDS * 4;
}
return A6XX_TEX_CONST_DWORDS * 4 * (1 +
COND(dev->physical_device->info->a6xx.storage_16bit &&
!dev->physical_device->info->a6xx.has_isam_v, 1));
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
return binding->descriptorCount;
default:
@@ -1012,45 +1011,48 @@ write_buffer_descriptor_addr(const struct tu_device *device,
uint32_t *dst,
const VkDescriptorAddressInfoEXT *buffer_info)
{
bool storage_16bit = device->physical_device->info->a6xx.storage_16bit;
/* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit
* access, but we need to keep a 32-bit descriptor for readonly access via
* isam.
*/
unsigned descriptors = storage_16bit ? 2 : 1;
const struct fd_dev_info *info = device->physical_device->info;
/* This prevents any misconfiguration, but 16-bit descriptor capable of both
* 16-bit and 32-bit access through isam.v will of course only be functional
* when 16-bit storage is supported. */
assert(!info->a6xx.has_isam_v || info->a6xx.storage_16bit);
if (!buffer_info || buffer_info->address == 0) {
memset(dst, 0, descriptors * A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
unsigned num_descriptors = 1 + COND(info->a6xx.storage_16bit &&
!info->a6xx.has_isam_v, 1);
memset(dst, 0, num_descriptors * A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
if (!buffer_info || buffer_info->address == 0)
return;
}
uint64_t va = buffer_info->address;
uint64_t base_va = va & ~0x3full;
unsigned offset = va & 0x3f;
uint32_t range = buffer_info->range;
for (unsigned i = 0; i < descriptors; i++) {
if (storage_16bit && i == 0) {
if (info->a6xx.storage_16bit) {
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_16_UINT);
dst[1] = DIV_ROUND_UP(range, 2);
dst[2] =
A6XX_TEX_CONST_2_STRUCTSIZETEXELS(1) |
A6XX_TEX_CONST_2_STARTOFFSETTEXELS(offset / 2) |
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
} else {
dst[4] = A6XX_TEX_CONST_4_BASE_LO(base_va);
dst[5] = A6XX_TEX_CONST_5_BASE_HI(base_va >> 32);
dst += A6XX_TEX_CONST_DWORDS;
}
/* Set up the 32-bit descriptor when 16-bit storage isn't supported or the
* 16-bit descriptor cannot be used for 32-bit loads through isam.v.
*/
if (!info->a6xx.storage_16bit || !info->a6xx.has_isam_v) {
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT);
dst[1] = DIV_ROUND_UP(range, 4);
dst[2] =
A6XX_TEX_CONST_2_STRUCTSIZETEXELS(1) |
A6XX_TEX_CONST_2_STARTOFFSETTEXELS(offset / 4) |
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
}
dst[3] = 0;
dst[4] = A6XX_TEX_CONST_4_BASE_LO(base_va);
dst[5] = A6XX_TEX_CONST_5_BASE_HI(base_va >> 32);
for (int j = 6; j < A6XX_TEX_CONST_DWORDS; j++)
dst[j] = 0;
dst += A6XX_TEX_CONST_DWORDS;
}
}

View File

@@ -1095,10 +1095,8 @@ tu_get_properties(struct tu_physical_device *pdevice,
props->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
props->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
props->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
props->storageBufferDescriptorSize =
pdevice->info->a6xx.storage_16bit ?
2 * A6XX_TEX_CONST_DWORDS * 4 :
A6XX_TEX_CONST_DWORDS * 4;
props->storageBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4 * (1 +
COND(pdevice->info->a6xx.storage_16bit && !pdevice->info->a6xx.has_isam_v, 1));
props->robustStorageBufferDescriptorSize =
props->storageBufferDescriptorSize;
props->inputAttachmentDescriptorSize = TU_DEBUG(DYNAMIC) ?

View File

@@ -282,10 +282,12 @@ lower_ssbo_ubo_intrinsic(struct tu_device *dev,
}
}
/* For isam, we need to use the appropriate descriptor if 16-bit storage is
* enabled. Descriptor 0 is the 16-bit one, descriptor 1 is the 32-bit one.
/* For isam, we need to adjust the descriptor index to use the 32-bit
* descriptor if 16-bit storage support is present but the 16-bit descriptor
* cannot be used for 32-bit access through isam.v.
*/
if (dev->physical_device->info->a6xx.storage_16bit &&
!dev->physical_device->info->a6xx.has_isam_v &&
intrin->intrinsic == nir_intrinsic_load_ssbo &&
(nir_intrinsic_access(intrin) & ACCESS_CAN_REORDER) &&
intrin->def.bit_size > 16) {