diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 810280aae2d..f6cc9ba159d 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -170,11 +170,21 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) case OPC_STL: case OPC_STP: case OPC_STLW: - case OPC_STIB: validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF)); validate_reg_size(ctx, instr->regs[2], instr->cat6.type); validate_assert(ctx, !(instr->regs[3]->flags & IR3_REG_HALF)); break; + case OPC_STIB: + if (instr->flags & IR3_INSTR_B) { + validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF)); + validate_assert(ctx, !(instr->regs[2]->flags & IR3_REG_HALF)); + validate_reg_size(ctx, instr->regs[3], instr->cat6.type); + } else { + validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF)); + validate_reg_size(ctx, instr->regs[2], instr->cat6.type); + validate_assert(ctx, !(instr->regs[3]->flags & IR3_REG_HALF)); + } + break; default: validate_reg_size(ctx, instr->regs[0], instr->cat6.type); validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF)); diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c index e4dccdb9171..bd4bff47d46 100644 --- a/src/freedreno/ir3/tests/disasm.c +++ b/src/freedreno/ir3/tests/disasm.c @@ -143,6 +143,7 @@ static const struct test { INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), INSTR_5XX(d7660204_02000a01, "(sy)stib.typed.2d.u32.1 g[1], r0.x, r0.z, r1.x"), + INSTR_6XX(c0240402_00674100, "stib.b.untyped.1d.u16.1.imm.base0 r0.z, r0.x, 2"), // TODO is this a real instruction? Or float -6.0 ? // INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true), diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 6e3a70d12d5..0de7c7dfd33 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -1812,7 +1812,7 @@ void tu_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, set->mapped_ptr = set_mem.map; set->va = set_mem.iova; - tu_update_descriptor_sets(tu_descriptor_set_to_handle(set), + tu_update_descriptor_sets(cmd->device, tu_descriptor_set_to_handle(set), descriptorWriteCount, pDescriptorWrites, 0, NULL); tu_CmdBindDescriptorSets(commandBuffer, pipelineBindPoint, _layout, _set, @@ -1851,7 +1851,7 @@ void tu_CmdPushDescriptorSetWithTemplateKHR( set->mapped_ptr = set_mem.map; set->va = set_mem.iova; - tu_update_descriptor_set_with_template(set, descriptorUpdateTemplate, pData); + tu_update_descriptor_set_with_template(cmd->device, set, descriptorUpdateTemplate, pData); tu_CmdBindDescriptorSets(commandBuffer, templ->bind_point, _layout, _set, 1, (VkDescriptorSet[]) { tu_descriptor_set_to_handle(set) }, diff --git a/src/freedreno/vulkan/tu_descriptor_set.c b/src/freedreno/vulkan/tu_descriptor_set.c index 1bf28a9e445..e906cb2c95e 100644 --- a/src/freedreno/vulkan/tu_descriptor_set.c +++ b/src/freedreno/vulkan/tu_descriptor_set.c @@ -706,17 +706,23 @@ static uint32_t get_range(struct tu_buffer *buf, VkDeviceSize offset, } static void -write_buffer_descriptor(uint32_t *dst, const VkDescriptorBufferInfo *buffer_info) +write_buffer_descriptor(const struct tu_device *device, + uint32_t *dst, + const VkDescriptorBufferInfo *buffer_info) { TU_FROM_HANDLE(tu_buffer, buffer, buffer_info->buffer); assert((buffer_info->offset & 63) == 0); /* minStorageBufferOffsetAlignment */ uint64_t va = tu_buffer_iova(buffer) + buffer_info->offset; uint32_t range = get_range(buffer, buffer_info->offset, buffer_info->range); - range = ALIGN_POT(range, 4) / 4; - dst[0] = - A6XX_IBO_0_TILE_MODE(TILE6_LINEAR) | A6XX_IBO_0_FMT(FMT6_32_UINT); - dst[1] = range; + /* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit access */ + if (device->physical_device->gpu_id >= 650) { + dst[0] = A6XX_IBO_0_TILE_MODE(TILE6_LINEAR) | A6XX_IBO_0_FMT(FMT6_16_UINT); + dst[1] = DIV_ROUND_UP(range, 2); + } else { + dst[0] = A6XX_IBO_0_TILE_MODE(TILE6_LINEAR) | A6XX_IBO_0_FMT(FMT6_32_UINT); + dst[1] = DIV_ROUND_UP(range, 4); + } dst[2] = A6XX_IBO_2_UNK4 | A6XX_IBO_2_TYPE(A6XX_TEX_1D) | A6XX_IBO_2_UNK31; dst[3] = 0; @@ -784,7 +790,8 @@ write_sampler_push(uint32_t *dst, const struct tu_sampler *sampler) } void -tu_update_descriptor_sets(VkDescriptorSet dstSetOverride, +tu_update_descriptor_sets(const struct tu_device *device, + VkDescriptorSet dstSetOverride, uint32_t descriptorWriteCount, const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount, @@ -823,12 +830,12 @@ tu_update_descriptor_sets(VkDescriptorSet dstSetOverride, assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); unsigned idx = writeset->dstArrayElement + j; idx += binding_layout->dynamic_offset_offset; - write_buffer_descriptor(set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx, + write_buffer_descriptor(device, set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx, writeset->pBufferInfo + j); break; } case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - write_buffer_descriptor(ptr, writeset->pBufferInfo + j); + write_buffer_descriptor(device, ptr, writeset->pBufferInfo + j); break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: @@ -915,7 +922,8 @@ tu_UpdateDescriptorSets(VkDevice _device, uint32_t descriptorCopyCount, const VkCopyDescriptorSet *pDescriptorCopies) { - tu_update_descriptor_sets(VK_NULL_HANDLE, + TU_FROM_HANDLE(tu_device, device, _device); + tu_update_descriptor_sets(device, VK_NULL_HANDLE, descriptorWriteCount, pDescriptorWrites, descriptorCopyCount, pDescriptorCopies); } @@ -1023,6 +1031,7 @@ tu_DestroyDescriptorUpdateTemplate( void tu_update_descriptor_set_with_template( + const struct tu_device *device, struct tu_descriptor_set *set, VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData) @@ -1049,11 +1058,11 @@ tu_update_descriptor_set_with_template( break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); - write_buffer_descriptor(set->dynamic_descriptors + dst_offset, src); + write_buffer_descriptor(device, set->dynamic_descriptors + dst_offset, src); break; } case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - write_buffer_descriptor(ptr, src); + write_buffer_descriptor(device, ptr, src); break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: @@ -1099,9 +1108,10 @@ tu_UpdateDescriptorSetWithTemplate( VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData) { + TU_FROM_HANDLE(tu_device, device, _device); TU_FROM_HANDLE(tu_descriptor_set, set, descriptorSet); - tu_update_descriptor_set_with_template(set, descriptorUpdateTemplate, pData); + tu_update_descriptor_set_with_template(device, set, descriptorUpdateTemplate, pData); } VkResult diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 2ce0e268874..5fe163be27d 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -340,6 +340,8 @@ void tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2 *pFeatures) { + TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice); + pFeatures->features = (VkPhysicalDeviceFeatures) { .robustBufferAccess = true, .fullDrawIndexUint32 = true, @@ -393,7 +395,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, switch (ext->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: { VkPhysicalDeviceVulkan11Features *features = (void *) ext; - features->storageBuffer16BitAccess = false; + features->storageBuffer16BitAccess = pdevice->gpu_id >= 650; features->uniformAndStorageBuffer16BitAccess = false; features->storagePushConstant16 = false; features->storageInputOutput16 = false; @@ -489,7 +491,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { VkPhysicalDevice16BitStorageFeatures *features = (VkPhysicalDevice16BitStorageFeatures *) ext; - features->storageBuffer16BitAccess = false; + features->storageBuffer16BitAccess = pdevice->gpu_id >= 650; features->uniformAndStorageBuffer16BitAccess = false; features->storagePushConstant16 = false; features->storageInputOutput16 = false; diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py index aa69ae0fd1a..ad83b8d2704 100644 --- a/src/freedreno/vulkan/tu_extensions.py +++ b/src/freedreno/vulkan/tu_extensions.py @@ -112,6 +112,7 @@ EXTENSIONS = [ Extension('VK_KHR_pipeline_executable_properties', 1, True), Extension('VK_KHR_shader_float_controls', 1, True), Extension('VK_KHR_shader_float16_int8', 1, True), + Extension('VK_KHR_16bit_storage', 1, 'device->gpu_id >= 650'), ] MAX_API_VERSION = VkVersion(MAX_API_VERSION) diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index edc34f19473..2db0958503e 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -1552,7 +1552,8 @@ uint32_t tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index); void -tu_update_descriptor_sets(VkDescriptorSet overrideSet, +tu_update_descriptor_sets(const struct tu_device *device, + VkDescriptorSet overrideSet, uint32_t descriptorWriteCount, const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount, @@ -1560,6 +1561,7 @@ tu_update_descriptor_sets(VkDescriptorSet overrideSet, void tu_update_descriptor_set_with_template( + const struct tu_device *device, struct tu_descriptor_set *set, VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData); diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index 9c3bef21501..4c144c0df2e 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -75,6 +75,7 @@ tu_spirv_to_nir(struct tu_device *dev, .runtime_descriptor_array = true, .float_controls = true, .float16 = true, + .storage_16bit = dev->physical_device->gpu_id >= 650, }, };