vulkan/runtime: add compute astc decoder helper functions

The astc compute decode and lut creation code is copied
from https://github.com/Themaister/Granite/

Always set DECODE_8BIT idea is copied from
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19886

v2: use astc glsl shader code (Chia-I Wu)
v3: fix 32bit compilation error (Christopher Snowhill)
v4: use pitch to copy in vk_create_fill_image_visible_mem() function
    pass correct layer to decode_astc()
v5: use existing ASTCLutHolder (Chia-I Wu)
v6: use only staging buffer (Chia-I Wu)
    use texel buffer for partition table (Chia-I Wu)
v7: use 2DArray for input and output
v8: check for == mem_property (Chia-I Wu)
    do not use vk_common* functions (Chia-I Wu)
    squash single buffer patch (Chia-I Wu)
    fix for minTexelBufferOffsetAlignment (Chia-I Wu)
    avoid wasting 4 slots (Chia-I Wu)
    remove partition_tbl_mask (Chia-I Wu)
    remove wrong bindings count (Chia-I Wu)
    use binding names from glsl code (Chia-I Wu)
    use ARRAY_SIZE (Chia-I Wu)
    use VkFormat for getting partition table index (Chia-I Wu)
    fix mutex lock (Chia-I Wu)
    image layout should be based on function call (Chia-I Wu)
    VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE is wrong (Chia-I Wu)
    add vk_texcompress_astc tag to helpder functions (Chia-I Wu)
    remove write_desc_set_count (Chia-I Wu)
    use desc_i++ (Chia-I Wu)
    add assert for desc_i count at end (Chia-I Wu)
    remove unused vk_create_map_texel_buffer() function (Chia-I Wu)
    dynamically create the lut offset (Chia-I Wu)
    offset not to pass as push contant (Chia-I Wu)
v9: use correct stoage and sampled flags (Chia-I Wu)
    always pass single_buf_size (Chia-I Wu)
    query drivers for minTexelBufferOffsetAlignment (Chia-I Wu)
    remove blank lines (Chia-I Wu)
    remove unnecessary if check in destroy (Chia-I Wu)
    name label as unlock instead of fail and pass (Chia-I Wu)
    use prog_glslang.found() (Chia-I Wu)
    add offset,extent check to astc shader (Chia-I Wu)
v10: prog_glslang can be undefined in meson.build (Chia-I Wu)
v11: remove with_texcompress_astc and use required in find_program (Chia-I Wu)
v12: offset are aligned to blk size (Chia-I Wu)
v13: texel_blk_start should be under vulkan if check (Chia-I Wu)
     dst image layout is always VK_IMAGE_LAYOUT_GENERAL (Chia-I Wu)

Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24672>
This commit is contained in:
Yogesh Mohan Marimuthu
2023-09-07 17:03:03 +05:30
committed by Marge Bot
parent bcc0e1e2af
commit ff4d658fd5
5 changed files with 811 additions and 7 deletions

View File

@@ -564,8 +564,8 @@ if vdpau_drivers_path == ''
vdpau_drivers_path = join_paths(get_option('libdir'), 'vdpau')
endif
if with_vulkan_overlay_layer or with_aco_tests or with_amd_vk or with_intel_vk
prog_glslang = find_program('glslangValidator', native : true)
prog_glslang = find_program('glslangValidator', native : true, required : with_vulkan_overlay_layer or with_aco_tests or with_amd_vk or with_intel_vk)
if prog_glslang.found()
if run_command(prog_glslang, [ '--quiet', '--version' ], check : false).returncode() == 0
glslang_quiet = ['--quiet']
else

View File

@@ -32,22 +32,28 @@ precision highp uimage2D;
#ifdef VULKAN
precision highp utextureBuffer;
precision highp utexture2D;
precision highp utexture2DArray;
precision highp uimage2DArray;
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z = 4) in;
layout(set = 0, binding = 0) writeonly uniform uimage2D OutputImage;
layout(set = 0, binding = 1) uniform utexture2D PayloadInput;
layout(set = 0, binding = 0) writeonly uniform uimage2DArray OutputImage2Darray;
layout(set = 0, binding = 1) uniform utexture2DArray PayloadInput2Darray;
layout(set = 0, binding = 2) uniform utextureBuffer LUTRemainingBitsToEndpointQuantizer;
layout(set = 0, binding = 3) uniform utextureBuffer LUTEndpointUnquantize;
layout(set = 0, binding = 4) uniform utextureBuffer LUTWeightQuantizer;
layout(set = 0, binding = 5) uniform utextureBuffer LUTWeightUnquantize;
layout(set = 0, binding = 6) uniform utextureBuffer LUTTritQuintDecode;
layout(set = 0, binding = 7) uniform utexture2D LUTPartitionTable;
layout(set = 0, binding = 7) uniform utextureBuffer LUTPartitionTable;
layout(constant_id = 2) const bool DECODE_8BIT = false;
layout(push_constant, std430) uniform pc {
ivec2 texel_blk_start;
ivec2 texel_end;
};
#else /* VULKAN */
layout(local_size_x = %u, local_size_y = %u, local_size_z = 4) in;
@@ -146,6 +152,9 @@ ivec4 build_coord()
ivec2 payload_coord = ivec2(gl_WorkGroupID.xy) * 2;
payload_coord.x += int(gl_LocalInvocationID.z) & 1;
payload_coord.y += (int(gl_LocalInvocationID.z) >> 1) & 1;
#ifdef VULKAN
payload_coord += texel_blk_start;
#endif /* VULKAN */
ivec2 coord = payload_coord * ivec2(gl_WorkGroupSize.xy);
coord += ivec2(gl_LocalInvocationID.xy);
return ivec4(coord, payload_coord);
@@ -1140,7 +1149,11 @@ void decode_endpoint(out ivec4 ep0, out ivec4 ep1, out int decode_mode,
void emit_decode_error(ivec2 coord)
{
#ifdef VULKAN
imageStore(OutputImage2Darray, ivec3(coord, gl_WorkGroupID.z), error_color);
#else /* VULKAN */
imageStore(OutputImage, coord, error_color);
#endif /* VULKAN */
}
int compute_num_endpoint_pairs(int num_partitions, int cem)
@@ -1233,12 +1246,22 @@ ivec4 void_extent_color(uvec4 payload, out int decode_mode)
void main()
{
ivec4 coord = build_coord();
#ifdef VULKAN
if (any(greaterThanEqual(coord.xy, texel_end.xy)))
return;
#else /* VULKAN */
if (any(greaterThanEqual(coord.xy, imageSize(OutputImage))))
return;
#endif /* VULKAN */
ivec2 pixel_coord = ivec2(gl_LocalInvocationID.xy);
int linear_pixel = int(gl_WorkGroupSize.x) * pixel_coord.y + pixel_coord.x;
uvec4 payload = texelFetch(PayloadInput, coord.zw, 0);
uvec4 payload;
#ifdef VULKAN
payload = texelFetch(PayloadInput2Darray,ivec3(coord.zw, gl_WorkGroupID.z), 0);
#else /* VULKAN */
payload = texelFetch(PayloadInput, coord.zw, 0);
#endif /* VULKAN */
BlockMode block_mode = decode_block_mode(payload);
CHECK_DECODE_ERROR();
@@ -1260,7 +1283,12 @@ void main()
{
int lut_x = pixel_coord.x + int(gl_WorkGroupSize.x) * (block_mode.seed & 31);
int lut_y = pixel_coord.y + int(gl_WorkGroupSize.y) * (block_mode.seed >> 5);
#ifdef VULKAN
int lut_width = int(gl_WorkGroupSize.x) * 32;
partition_index = int(texelFetch(LUTPartitionTable, lut_y * lut_width + lut_x).x);
#else /* VULKAN */
partition_index = int(texelFetch(LUTPartitionTable, ivec2(lut_x, lut_y), 0).x);
#endif /* VULKAN */
partition_index = (partition_index >> (2 * block_mode.num_partitions - 4)) & 3;
}
@@ -1315,7 +1343,11 @@ void main()
if (DECODE_8BIT)
{
#ifdef VULKAN
imageStore(OutputImage2Darray, ivec3(coord.xy, gl_WorkGroupID.z), uvec4(final_color >> 8));
#else /* VULKAN */
imageStore(OutputImage, coord.xy, uvec4(final_color >> 8));
#endif /* VULKAN */
}
else
{
@@ -1324,6 +1356,10 @@ void main()
encoded = uvec4(final_color);
else
encoded = decode_fp16(final_color, decode_mode);
#ifdef VULKAN
imageStore(OutputImage2Darray, ivec3(coord.xy, gl_WorkGroupID.z), encoded);
#else /* VULKAN */
imageStore(OutputImage, coord.xy, encoded);
#endif /* VULKAN */
}
}

View File

@@ -135,6 +135,16 @@ if with_platform_android
vulkan_runtime_deps += dep_android
endif
if prog_glslang.found()
vulkan_runtime_files += files('vk_texcompress_astc.c', 'vk_texcompress_astc.h')
vulkan_runtime_files += custom_target(
'astc_spv.h',
input : astc_decoder_glsl_file,
output : 'astc_spv.h',
command : [prog_glslang, '-V', '-S', 'comp', '-x', '-o', '@OUTPUT@', '@INPUT@'] + glslang_quiet,
)
endif
vk_common_entrypoints = custom_target(
'vk_common_entrypoints',
input : [vk_entrypoints_gen, vk_api_xml],

View File

@@ -0,0 +1,637 @@
/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "vk_texcompress_astc.h"
#include "util/texcompress_astc_luts_wrap.h"
#include "vk_alloc.h"
#include "vk_buffer.h"
#include "vk_device.h"
#include "vk_format.h"
#include "vk_image.h"
#include "vk_physical_device.h"
/* type_indexes_mask bits are set/clear for support memory type index as per
* struct VkPhysicalDeviceMemoryProperties.memoryTypes[] */
static uint32_t
get_mem_type_index(struct vk_device *device, uint32_t type_indexes_mask,
VkMemoryPropertyFlags mem_property)
{
const struct vk_physical_device_dispatch_table *disp = &device->physical->dispatch_table;
VkPhysicalDevice _phy_device = vk_physical_device_to_handle(device->physical);
VkPhysicalDeviceMemoryProperties2 props2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2,
.pNext = NULL,
};
disp->GetPhysicalDeviceMemoryProperties2(_phy_device, &props2);
for (uint32_t i = 0; i < props2.memoryProperties.memoryTypeCount; i++) {
if ((type_indexes_mask & (1 << i)) &&
((props2.memoryProperties.memoryTypes[i].propertyFlags & mem_property) == mem_property)) {
return i;
}
}
return -1;
}
static VkResult
vk_create_buffer(struct vk_device *device, VkAllocationCallbacks *allocator,
VkDeviceSize size, VkMemoryPropertyFlags mem_prop_flags,
VkBufferUsageFlags usage_flags, VkBuffer *vk_buf,
VkDeviceMemory *vk_mem)
{
VkResult result;
VkDevice _device = vk_device_to_handle(device);
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
VkBufferCreateInfo buffer_create_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = size,
.usage = usage_flags,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
};
result =
disp->CreateBuffer(_device, &buffer_create_info, allocator, vk_buf);
if (unlikely(result != VK_SUCCESS))
return result;
VkBufferMemoryRequirementsInfo2 mem_req_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
.buffer = *vk_buf,
};
VkMemoryRequirements2 mem_req = {
.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
};
disp->GetBufferMemoryRequirements2(_device, &mem_req_info, &mem_req);
uint32_t mem_type_index = get_mem_type_index(
device, mem_req.memoryRequirements.memoryTypeBits, mem_prop_flags);
if (mem_type_index == -1)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = mem_req.memoryRequirements.size,
.memoryTypeIndex = mem_type_index,
};
result = disp->AllocateMemory(_device, &alloc_info, allocator, vk_mem);
if (unlikely(result != VK_SUCCESS))
return result;
disp->BindBufferMemory(_device, *vk_buf, *vk_mem, 0);
return result;
}
static VkResult
create_buffer_view(struct vk_device *device, VkAllocationCallbacks *allocator,
VkBufferView *buf_view, VkBuffer buf, VkFormat format, VkDeviceSize size,
VkDeviceSize offset)
{
VkResult result;
VkDevice _device = vk_device_to_handle(device);
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
VkBufferViewCreateInfo buffer_view_create_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
.buffer = buf,
.format = format,
.offset = offset,
.range = size,
};
result = disp->CreateBufferView(_device, &buffer_view_create_info,
allocator, buf_view);
return result;
}
static uint8_t
get_partition_table_index(VkFormat format)
{
switch (format) {
case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
return 0;
case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
return 1;
case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
return 2;
case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
return 3;
case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
return 4;
case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
return 5;
case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
return 6;
case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
return 7;
case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
return 8;
case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
return 9;
case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
return 10;
case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
return 11;
case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
return 12;
case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
return 13;
default:
unreachable("bad astc format\n");
return 0;
}
}
static VkResult
astc_prepare_buffer(struct vk_device *device,
struct vk_texcompress_astc_state *astc,
VkAllocationCallbacks *allocator,
VkDeviceSize minTexelBufferOffsetAlignment,
uint8_t *single_buf_ptr,
VkDeviceSize *single_buf_size)
{
VkResult result;
astc_decoder_lut_holder astc_lut_holder;
VkDeviceSize offset = 0;
_mesa_init_astc_decoder_luts(&astc_lut_holder);
const astc_decoder_lut *luts[] = {
&astc_lut_holder.color_endpoint,
&astc_lut_holder.color_endpoint_unquant,
&astc_lut_holder.weights,
&astc_lut_holder.weights_unquant,
&astc_lut_holder.trits_quints,
};
for (unsigned i = 0; i < ARRAY_SIZE(luts); i++) {
offset = align(offset, minTexelBufferOffsetAlignment);
if (single_buf_ptr) {
memcpy(single_buf_ptr + offset, luts[i]->data, luts[i]->size_B);
result = create_buffer_view(device, allocator, &astc->luts_buf_view[i], astc->luts_buf,
vk_format_from_pipe_format(luts[i]->format), luts[i]->size_B,
offset);
if (result != VK_SUCCESS)
return result;
}
offset += luts[i]->size_B;
}
const VkFormat formats[] = {
VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
};
for (uint32_t i = 0; i < ARRAY_SIZE(formats); i++) {
unsigned lut_width;
unsigned lut_height;
const void *lut_data = _mesa_get_astc_decoder_partition_table(
vk_format_get_blockwidth(formats[i]),
vk_format_get_blockheight(formats[i]),
&lut_width, &lut_height);
const unsigned lut_size = lut_width * lut_height;
offset = align(offset, minTexelBufferOffsetAlignment);
if (single_buf_ptr) {
memcpy(single_buf_ptr + offset, lut_data, lut_width * lut_height);
result = create_buffer_view(device, allocator, &astc->partition_tbl_buf_view[i],
astc->luts_buf, VK_FORMAT_R8_UINT, lut_width * lut_height,
offset);
if (result != VK_SUCCESS)
return result;
}
offset += lut_size;
}
*single_buf_size = offset;
return result;
}
static VkResult
create_fill_all_luts_vulkan(struct vk_device *device,
VkAllocationCallbacks *allocator,
struct vk_texcompress_astc_state *astc)
{
VkResult result;
VkDevice _device = vk_device_to_handle(device);
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
VkPhysicalDevice _phy_device = vk_physical_device_to_handle(device->physical);
const struct vk_physical_device_dispatch_table *phy_disp = &device->physical->dispatch_table;
VkDeviceSize single_buf_size;
uint8_t *single_buf_ptr;
VkPhysicalDeviceProperties2 phy_dev_prop = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
.pNext = NULL,
};
phy_disp->GetPhysicalDeviceProperties2(_phy_device, &phy_dev_prop);
/* get the single_buf_size */
result = astc_prepare_buffer(device, astc, allocator,
phy_dev_prop.properties.limits.minTexelBufferOffsetAlignment,
NULL, &single_buf_size);
/* create gpu buffer for all the luts */
result = vk_create_buffer(device, allocator, single_buf_size,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
&astc->luts_buf, &astc->luts_mem);
if (unlikely(result != VK_SUCCESS))
return result;
disp->MapMemory(_device, astc->luts_mem, 0, VK_WHOLE_SIZE, 0, (void*)&single_buf_ptr);
/* fill all the luts and create views */
result = astc_prepare_buffer(device, astc, allocator,
phy_dev_prop.properties.limits.minTexelBufferOffsetAlignment,
single_buf_ptr, &single_buf_size);
disp->UnmapMemory(_device, astc->luts_mem);
return result;
}
static VkResult
create_layout(struct vk_device *device, VkAllocationCallbacks *allocator,
struct vk_texcompress_astc_state *astc)
{
VkResult result;
VkDevice _device = vk_device_to_handle(device);
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
VkDescriptorSetLayoutBinding bindings[] = {
{
.binding = 0, /* OutputImage2DArray */
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL,
},
{
.binding = 1, /* PayloadInput2DArray */
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL,
},
{
.binding = 2, /* LUTRemainingBitsToEndpointQuantizer */
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL,
},
{
.binding = 3, /* LUTEndpointUnquantize */
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL,
},
{
.binding = 4, /* LUTWeightQuantizer */
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL,
},
{
.binding = 5, /* LUTWeightUnquantize */
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL,
},
{
.binding = 6, /* LUTTritQuintDecode */
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL,
},
{
.binding = 7, /* LUTPartitionTable */
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL,
},
};
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = ARRAY_SIZE(bindings),
.pBindings = bindings,
};
result = disp->CreateDescriptorSetLayout(_device, &ds_create_info,
allocator, &astc->ds_layout);
if (result != VK_SUCCESS)
goto fail;
VkPipelineLayoutCreateInfo pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &astc->ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
result = disp->CreatePipelineLayout(_device, &pl_create_info, allocator,
&astc->p_layout);
fail:
return result;
}
static const uint32_t astc_spv[] = {
#include "astc_spv.h"
};
static VkResult
vk_astc_create_shader_module(struct vk_device *device,
VkAllocationCallbacks *allocator,
struct vk_texcompress_astc_state *astc)
{
VkDevice _device = vk_device_to_handle(device);
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
VkShaderModuleCreateInfo shader_module_create_info = {
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = NULL,
.flags = 0,
.codeSize = sizeof(astc_spv),
.pCode = astc_spv,
};
return disp->CreateShaderModule(_device, &shader_module_create_info,
allocator, &astc->shader_module);
}
static VkResult
create_astc_decode_pipeline(struct vk_device *device,
VkAllocationCallbacks *allocator,
struct vk_texcompress_astc_state *astc,
VkPipelineCache pipeline_cache, VkFormat format)
{
VkResult result;
VkDevice _device = vk_device_to_handle(device);
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
VkPipeline pipeline;
uint8_t t_i;
t_i = get_partition_table_index(format);
uint32_t special_data[3] = {
vk_format_get_blockwidth(format),
vk_format_get_blockheight(format),
true,
};
VkSpecializationMapEntry special_map_entry[3] = {{
.constantID = 0,
.offset = 0,
.size = 4,
},
{
.constantID = 1,
.offset = 4,
.size = 4,
},
{
.constantID = 2,
.offset = 8,
.size = 4,
}};
VkSpecializationInfo specialization_info = {
.mapEntryCount = 3,
.pMapEntries = special_map_entry,
.dataSize = 12,
.pData = special_data,
};
/* compute shader */
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = astc->shader_module,
.pName = "main",
.pSpecializationInfo = &specialization_info,
};
VkComputePipelineCreateInfo vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = pipeline_shader_stage,
.flags = 0,
.layout = astc->p_layout,
};
result = disp->CreateComputePipelines(
_device, pipeline_cache, 1, &vk_pipeline_info, allocator, &pipeline);
if (result != VK_SUCCESS)
return result;
astc->pipeline[t_i] = pipeline;
astc->pipeline_mask |= (1 << t_i);
return result;
}
VkPipeline
vk_texcompress_astc_get_decode_pipeline(struct vk_device *device, VkAllocationCallbacks *allocator,
struct vk_texcompress_astc_state *astc, VkPipelineCache pipeline_cache,
VkFormat format)
{
VkResult result;
uint8_t t_i = get_partition_table_index(format);
simple_mtx_lock(&astc->mutex);
if (astc->pipeline[t_i])
goto unlock;
if (!astc->shader_module) {
result = vk_astc_create_shader_module(device, allocator, astc);
if (result != VK_SUCCESS)
goto unlock;
}
create_astc_decode_pipeline(device, allocator, astc, pipeline_cache, format);
unlock:
simple_mtx_unlock(&astc->mutex);
return astc->pipeline[t_i];
}
static inline void
fill_desc_image_info_struct(VkDescriptorImageInfo *info, VkImageView img_view,
VkImageLayout img_layout)
{
info->sampler = VK_NULL_HANDLE;
info->imageView = img_view;
info->imageLayout = img_layout;
}
static inline void
fill_write_descriptor_set_image(VkWriteDescriptorSet *set, uint8_t bind_i,
VkDescriptorType desc_type, VkDescriptorImageInfo *image_info)
{
set->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
set->pNext = NULL;
set->dstSet = VK_NULL_HANDLE;
set->dstBinding = bind_i;
set->dstArrayElement = 0;
set->descriptorCount = 1;
set->descriptorType = desc_type;
set->pImageInfo = image_info;
set->pBufferInfo = NULL;
set->pTexelBufferView = NULL;
}
static inline void
fill_write_descriptor_set_uniform_texel(VkWriteDescriptorSet *set,
uint8_t bind_i,
VkBufferView *buf_view)
{
set->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
set->pNext = NULL;
set->dstSet = VK_NULL_HANDLE;
set->dstBinding = bind_i;
set->dstArrayElement = 0;
set->descriptorCount = 1;
set->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
set->pImageInfo = NULL;
set->pBufferInfo = NULL;
set->pTexelBufferView = buf_view;
}
void
vk_texcompress_astc_fill_write_descriptor_sets(struct vk_texcompress_astc_state *astc,
struct vk_texcompress_astc_write_descriptor_set *set,
VkImageView src_img_view, VkImageLayout src_img_layout,
VkImageView dst_img_view,
VkFormat format)
{
unsigned desc_i;
desc_i = 0;
fill_desc_image_info_struct(&set->dst_desc_image_info, dst_img_view, VK_IMAGE_LAYOUT_GENERAL);
fill_write_descriptor_set_image(&set->descriptor_set[desc_i], desc_i,
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &set->dst_desc_image_info);
desc_i++;
fill_desc_image_info_struct(&set->src_desc_image_info, src_img_view, src_img_layout);
fill_write_descriptor_set_image(&set->descriptor_set[desc_i], desc_i,
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, &set->src_desc_image_info);
/* fill luts descriptor */
desc_i++;
for (unsigned i = 0; i < VK_TEXCOMPRESS_ASTC_NUM_LUTS; i++) {
fill_write_descriptor_set_uniform_texel(&set->descriptor_set[desc_i + i], desc_i + i,
&astc->luts_buf_view[i]);
}
desc_i += VK_TEXCOMPRESS_ASTC_NUM_LUTS;
uint8_t t_i = get_partition_table_index(format);
fill_write_descriptor_set_uniform_texel(&set->descriptor_set[desc_i], desc_i,
&astc->partition_tbl_buf_view[t_i]);
desc_i++;
assert(desc_i == ARRAY_SIZE(set->descriptor_set));
}
VkResult
vk_texcompress_astc_init(struct vk_device *device, VkAllocationCallbacks *allocator,
VkPipelineCache pipeline_cache,
struct vk_texcompress_astc_state **astc)
{
VkResult result;
/* astc memory to be freed as part of vk_astc_decode_finish() */
*astc = vk_zalloc(allocator, sizeof(struct vk_texcompress_astc_state), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (*astc == NULL)
return VK_ERROR_OUT_OF_HOST_MEMORY;
simple_mtx_init(&(*astc)->mutex, mtx_plain);
result = create_fill_all_luts_vulkan(device, allocator, *astc);
if (result != VK_SUCCESS)
goto fail;
result = create_layout(device, allocator, *astc);
fail:
return result;
}
void
vk_texcompress_astc_finish(struct vk_device *device,
VkAllocationCallbacks *allocator,
struct vk_texcompress_astc_state *astc)
{
VkDevice _device = vk_device_to_handle(device);
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
while (astc->pipeline_mask) {
uint8_t t_i = u_bit_scan(&astc->pipeline_mask);
disp->DestroyPipeline(_device, astc->pipeline[t_i], allocator);
}
disp->DestroyPipelineLayout(_device, astc->p_layout, allocator);
disp->DestroyShaderModule(_device, astc->shader_module, allocator);
disp->DestroyDescriptorSetLayout(_device, astc->ds_layout, allocator);
for (unsigned i = 0; i < VK_TEXCOMPRESS_ASTC_NUM_LUTS; i++)
disp->DestroyBufferView(_device, astc->luts_buf_view[i], allocator);
for (unsigned i = 0; i < VK_TEXCOMPRESS_ASTC_NUM_PARTITION_TABLES; i++)
disp->DestroyBufferView(_device, astc->partition_tbl_buf_view[i], allocator);
disp->DestroyBuffer(_device, astc->luts_buf, allocator);
disp->FreeMemory(_device, astc->luts_mem, allocator);
vk_free(allocator, astc);
}

View File

@@ -0,0 +1,121 @@
/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef VK_TEXCOMPRESS_ASTC_H
#define VK_TEXCOMPRESS_ASTC_H
#include "vk_device.h"
/* luts order matching astc glsl shader below,
* 0 - color endpoint
* 1 - color endpoint unquant
* 2 - weights
* 3 - weights unquant
* 4 - trits quints
*/
#define VK_TEXCOMPRESS_ASTC_NUM_LUTS 5
#define VK_TEXCOMPRESS_ASTC_NUM_PARTITION_TABLES 14
#define VK_TEXCOMPRESS_ASTC_WRITE_DESC_SET_COUNT 8
struct vk_texcompress_astc_state {
/* single buffer is allocated for all luts */
VkDeviceMemory luts_mem;
VkBuffer luts_buf;
VkBufferView luts_buf_view[VK_TEXCOMPRESS_ASTC_NUM_LUTS];
VkBufferView partition_tbl_buf_view[VK_TEXCOMPRESS_ASTC_NUM_PARTITION_TABLES];
simple_mtx_t mutex;
VkDescriptorSetLayout ds_layout;
VkPipelineLayout p_layout;
VkPipeline pipeline[VK_TEXCOMPRESS_ASTC_NUM_PARTITION_TABLES];
uint32_t pipeline_mask;
VkShaderModule shader_module;
};
struct vk_texcompress_astc_write_descriptor_set {
VkWriteDescriptorSet descriptor_set[VK_TEXCOMPRESS_ASTC_WRITE_DESC_SET_COUNT];
VkDescriptorImageInfo dst_desc_image_info;
VkDescriptorImageInfo src_desc_image_info;
};
void
vk_texcompress_astc_fill_write_descriptor_sets(struct vk_texcompress_astc_state *astc,
struct vk_texcompress_astc_write_descriptor_set *set,
VkImageView src_img_view, VkImageLayout src_img_layout,
VkImageView dst_img_view,
VkFormat format);
VkPipeline vk_texcompress_astc_get_decode_pipeline(struct vk_device *device,
VkAllocationCallbacks *allocator,
struct vk_texcompress_astc_state *astc,
VkPipelineCache pipeline_cache,
VkFormat format);
VkResult vk_texcompress_astc_init(struct vk_device *device,
VkAllocationCallbacks *allocator,
VkPipelineCache pipeline_cache,
struct vk_texcompress_astc_state **astc);
void vk_texcompress_astc_finish(struct vk_device *device,
VkAllocationCallbacks *allocator,
struct vk_texcompress_astc_state *astc);
static inline VkFormat
vk_texcompress_astc_emulation_format(VkFormat format)
{
/* TODO: From VK_EXT_astc_Decode_mode spec, VK_FORMAT_R16G16B16A16_SFLOAT is the default
* option. VK_FORMAT_R8G8B8A8_UNORM is only acceptable image quality option.
*/
switch (format) {
case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
return VK_FORMAT_R8G8B8A8_UNORM;
case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
return VK_FORMAT_R8G8B8A8_SRGB;
default:
return VK_FORMAT_UNDEFINED;
}
}
#endif /* VK_TEXCOMPRESS_ASTC_H */