tu: Move UBWC layout into fdl6_layout() and use that function.
This gets us shared non-UBWC layout code between gallium and turnip. Until I fix up the rest of gallium to handle UBWC mipmapping, we do the single-level UBWC setup in gallium as a fixup after layout. Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
This commit is contained in:
@@ -29,28 +29,39 @@
|
||||
|
||||
#include "freedreno_layout.h"
|
||||
|
||||
/* indexed by cpp, including msaa 2x and 4x: */
|
||||
/* indexed by cpp, including msaa 2x and 4x:
|
||||
* TODO:
|
||||
* cpp=1 UBWC needs testing at larger texture sizes
|
||||
* missing UBWC blockwidth/blockheight for npot+64 cpp
|
||||
* missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
|
||||
*/
|
||||
static const struct {
|
||||
unsigned pitchalign;
|
||||
unsigned heightalign;
|
||||
uint8_t ubwc_blockwidth;
|
||||
uint8_t ubwc_blockheight;
|
||||
} tile_alignment[] = {
|
||||
[1] = { 128, 32 },
|
||||
[2] = { 128, 16 },
|
||||
[1] = { 128, 32, 16, 4 },
|
||||
[2] = { 128, 16, 16, 4 },
|
||||
[3] = { 64, 32 },
|
||||
[4] = { 64, 16 },
|
||||
[4] = { 64, 16, 16, 4 },
|
||||
[6] = { 64, 16 },
|
||||
[8] = { 64, 16 },
|
||||
[8] = { 64, 16, 8, 4, },
|
||||
[12] = { 64, 16 },
|
||||
[16] = { 64, 16 },
|
||||
[16] = { 64, 16, 4, 4, },
|
||||
[24] = { 64, 16 },
|
||||
[32] = { 64, 16 },
|
||||
[32] = { 64, 16, 4, 2 },
|
||||
[48] = { 64, 16 },
|
||||
[64] = { 64, 16 },
|
||||
|
||||
/* special cases for r8g8: */
|
||||
[0] = { 64, 32 },
|
||||
[0] = { 64, 32, 16, 4 },
|
||||
};
|
||||
|
||||
#define RGB_TILE_WIDTH_ALIGNMENT 64
|
||||
#define RGB_TILE_HEIGHT_ALIGNMENT 16
|
||||
#define UBWC_PLANE_SIZE_ALIGNMENT 4096
|
||||
|
||||
/* NOTE: good way to test this is: (for example)
|
||||
* piglit/bin/texelFetch fs sampler3D 100x100x8
|
||||
*/
|
||||
@@ -58,7 +69,7 @@ void
|
||||
fdl6_layout(struct fdl_layout *layout,
|
||||
enum pipe_format format, uint32_t nr_samples,
|
||||
uint32_t width0, uint32_t height0, uint32_t depth0,
|
||||
uint32_t mip_levels, uint32_t array_size, bool is_3d)
|
||||
uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc)
|
||||
{
|
||||
assert(nr_samples > 0);
|
||||
layout->width0 = width0;
|
||||
@@ -70,7 +81,6 @@ fdl6_layout(struct fdl_layout *layout,
|
||||
|
||||
const struct util_format_description *format_desc =
|
||||
util_format_description(format);
|
||||
uint32_t level;
|
||||
uint32_t depth = depth0;
|
||||
/* linear dimensions: */
|
||||
uint32_t lwidth = width0;
|
||||
@@ -100,9 +110,11 @@ fdl6_layout(struct fdl_layout *layout,
|
||||
debug_assert(ta < ARRAY_SIZE(tile_alignment));
|
||||
debug_assert(tile_alignment[ta].pitchalign);
|
||||
|
||||
for (level = 0; level < mip_levels; level++) {
|
||||
for (uint32_t level = 0; level < mip_levels; level++) {
|
||||
struct fdl_slice *slice = &layout->slices[level];
|
||||
uint32_t tile_mode = fdl_tile_mode(layout, level);
|
||||
struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
|
||||
uint32_t tile_mode = (ubwc ?
|
||||
layout->tile_mode : fdl_tile_mode(layout, level));
|
||||
uint32_t width, height;
|
||||
|
||||
/* tiled levels of 3D textures are rounded up to PoT dimensions: */
|
||||
@@ -162,13 +174,30 @@ fdl6_layout(struct fdl_layout *layout,
|
||||
|
||||
layout->size += slice->size0 * depth * layers_in_level;
|
||||
|
||||
if (false) {
|
||||
fprintf(stderr, "%s: %ux%ux%u@%u:\t%2u: stride=%4u, size=%6u,%7u, aligned_height=%3u, blocks=%u, offset=0x%x tiling=%d\n",
|
||||
util_format_name(format),
|
||||
width, height, depth, layout->cpp,
|
||||
level, slice->pitch * layout->cpp,
|
||||
slice->size0, layout->size, aligned_height, blocks,
|
||||
slice->offset, tile_mode);
|
||||
if (ubwc) {
|
||||
/* with UBWC every level is aligned to 4K */
|
||||
layout->size = align(layout->size, 4096);
|
||||
|
||||
uint32_t block_width = tile_alignment[ta].ubwc_blockwidth;
|
||||
uint32_t block_height = tile_alignment[ta].ubwc_blockheight;
|
||||
uint32_t meta_pitch = align(DIV_ROUND_UP(width, block_width),
|
||||
RGB_TILE_WIDTH_ALIGNMENT);
|
||||
uint32_t meta_height = align(DIV_ROUND_UP(height, block_height),
|
||||
RGB_TILE_HEIGHT_ALIGNMENT);
|
||||
|
||||
/* it looks like mipmaps need alignment to power of two
|
||||
* TODO: needs testing with large npot textures
|
||||
* (needed for the first level?)
|
||||
*/
|
||||
if (mip_levels > 1) {
|
||||
meta_pitch = util_next_power_of_two(meta_pitch);
|
||||
meta_height = util_next_power_of_two(meta_height);
|
||||
}
|
||||
|
||||
ubwc_slice->size0 = align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
|
||||
ubwc_slice->pitch = meta_pitch;
|
||||
ubwc_slice->offset = layout->ubwc_size;
|
||||
layout->ubwc_size += ubwc_slice->size0;
|
||||
}
|
||||
|
||||
depth = u_minify(depth, 1);
|
||||
@@ -182,4 +211,45 @@ fdl6_layout(struct fdl_layout *layout,
|
||||
layout->layer_size = align(layout->size, 4096);
|
||||
layout->size = layout->layer_size * array_size;
|
||||
}
|
||||
|
||||
/* Place the UBWC slices before the uncompressed slices, because the
|
||||
* kernel expects UBWC to be at the start of the buffer. In the HW, we
|
||||
* get to program the UBWC and non-UBWC offset/strides
|
||||
* independently.
|
||||
*/
|
||||
if (ubwc) {
|
||||
for (uint32_t level = 0; level < mip_levels; level++)
|
||||
layout->slices[level].offset += layout->ubwc_size * array_size;
|
||||
layout->size += layout->ubwc_size * array_size;
|
||||
}
|
||||
|
||||
if (false) {
|
||||
for (uint32_t level = 0; level < mip_levels; level++) {
|
||||
struct fdl_slice *slice = &layout->slices[level];
|
||||
struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
|
||||
uint32_t tile_mode = (ubwc ?
|
||||
layout->tile_mode : fdl_tile_mode(layout, level));
|
||||
|
||||
fprintf(stderr, "%s: %ux%ux%u@%ux%u:\t%2u: stride=%4u, size=%6u,%6u, aligned_height=%3u, offset=0x%x,0x%x tiling=%d\n",
|
||||
util_format_name(format),
|
||||
u_minify(layout->width0, level),
|
||||
u_minify(layout->height0, level),
|
||||
u_minify(layout->depth0, level),
|
||||
layout->cpp, nr_samples,
|
||||
level,
|
||||
slice->pitch * layout->cpp,
|
||||
slice->size0, ubwc_slice->size0,
|
||||
slice->size0 / (slice->pitch * layout->cpp),
|
||||
slice->offset, ubwc_slice->offset,
|
||||
tile_mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
|
||||
uint32_t *blockwidth, uint32_t *blockheight)
|
||||
{
|
||||
*blockwidth = tile_alignment[layout->cpp].ubwc_blockwidth;
|
||||
*blockheight = tile_alignment[layout->cpp].ubwc_blockheight;
|
||||
}
|
||||
|
@@ -174,6 +174,10 @@ void
|
||||
fdl6_layout(struct fdl_layout *layout,
|
||||
enum pipe_format format, uint32_t nr_samples,
|
||||
uint32_t width0, uint32_t height0, uint32_t depth0,
|
||||
uint32_t mip_levels, uint32_t array_size, bool is_3d);
|
||||
uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc);
|
||||
|
||||
void
|
||||
fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
|
||||
uint32_t *blockwidth, uint32_t *blockheight);
|
||||
|
||||
#endif /* FREEDRENO_LAYOUT_H_ */
|
||||
|
@@ -1563,7 +1563,7 @@ tu_GetImageMemoryRequirements(VkDevice _device,
|
||||
TU_FROM_HANDLE(tu_image, image, _image);
|
||||
|
||||
pMemoryRequirements->memoryTypeBits = 1;
|
||||
pMemoryRequirements->size = image->size;
|
||||
pMemoryRequirements->size = image->layout.size;
|
||||
pMemoryRequirements->alignment = image->alignment;
|
||||
}
|
||||
|
||||
|
@@ -50,147 +50,6 @@ tu6_get_image_tile_mode(struct tu_image *image, int level)
|
||||
return image->layout.tile_mode;
|
||||
}
|
||||
|
||||
/* indexed by cpp, including msaa 2x and 4x: */
|
||||
static const struct {
|
||||
uint8_t pitchalign;
|
||||
uint8_t heightalign;
|
||||
uint8_t ubwc_blockwidth;
|
||||
uint8_t ubwc_blockheight;
|
||||
} tile_alignment[] = {
|
||||
/* TODO:
|
||||
* cpp=1 UBWC needs testing at larger texture sizes
|
||||
* missing UBWC blockwidth/blockheight for npot+64 cpp
|
||||
* missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
|
||||
*/
|
||||
[1] = { 128, 32, 16, 4 },
|
||||
[2] = { 128, 16, 16, 4 },
|
||||
[3] = { 64, 32 },
|
||||
[4] = { 64, 16, 16, 4 },
|
||||
[6] = { 64, 16 },
|
||||
[8] = { 64, 16, 8, 4, },
|
||||
[12] = { 64, 16 },
|
||||
[16] = { 64, 16, 4, 4, },
|
||||
[24] = { 64, 16 },
|
||||
[32] = { 64, 16, 4, 2 },
|
||||
[48] = { 64, 16 },
|
||||
[64] = { 64, 16 },
|
||||
/* special case for r8g8: */
|
||||
[0] = { 64, 32, 16, 4 },
|
||||
};
|
||||
|
||||
static void
|
||||
setup_slices(struct tu_image *image,
|
||||
const VkImageCreateInfo *pCreateInfo,
|
||||
bool ubwc_enabled)
|
||||
{
|
||||
#define RGB_TILE_WIDTH_ALIGNMENT 64
|
||||
#define RGB_TILE_HEIGHT_ALIGNMENT 16
|
||||
#define UBWC_PLANE_SIZE_ALIGNMENT 4096
|
||||
VkFormat format = pCreateInfo->format;
|
||||
enum util_format_layout layout = vk_format_description(format)->layout;
|
||||
uint32_t layer_size = 0;
|
||||
uint32_t ubwc_size = 0;
|
||||
int ta = image->layout.cpp;
|
||||
|
||||
if (image->type != VK_IMAGE_TYPE_3D)
|
||||
image->layout.layer_first = true;
|
||||
|
||||
/* The r8g8 format seems to not play by the normal tiling rules: */
|
||||
if (image->layout.cpp == 2 && vk_format_get_nr_components(format) == 2)
|
||||
ta = 0;
|
||||
|
||||
for (unsigned level = 0; level < pCreateInfo->mipLevels; level++) {
|
||||
struct fdl_slice *slice = &image->layout.slices[level];
|
||||
struct fdl_slice *ubwc_slice = &image->layout.ubwc_slices[level];
|
||||
uint32_t width = u_minify(pCreateInfo->extent.width, level);
|
||||
uint32_t height = u_minify(pCreateInfo->extent.height, level);
|
||||
uint32_t depth = u_minify(pCreateInfo->extent.depth, level);
|
||||
uint32_t aligned_height = height;
|
||||
uint32_t blocks;
|
||||
uint32_t pitchalign;
|
||||
|
||||
if (image->layout.tile_mode &&
|
||||
!image_level_linear(image, level, ubwc_enabled)) {
|
||||
/* tiled levels of 3D textures are rounded up to PoT dimensions: */
|
||||
if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) {
|
||||
width = util_next_power_of_two(width);
|
||||
height = aligned_height = util_next_power_of_two(height);
|
||||
}
|
||||
pitchalign = tile_alignment[ta].pitchalign;
|
||||
aligned_height = align(aligned_height, tile_alignment[ta].heightalign);
|
||||
} else {
|
||||
pitchalign = 64;
|
||||
}
|
||||
|
||||
/* The blits used for mem<->gmem work at a granularity of
|
||||
* 32x32, which can cause faults due to over-fetch on the
|
||||
* last level. The simple solution is to over-allocate a
|
||||
* bit the last level to ensure any over-fetch is harmless.
|
||||
* The pitch is already sufficiently aligned, but height
|
||||
* may not be:
|
||||
*/
|
||||
if (level + 1 == pCreateInfo->mipLevels)
|
||||
aligned_height = align(aligned_height, 32);
|
||||
|
||||
if (layout == UTIL_FORMAT_LAYOUT_ASTC)
|
||||
slice->pitch =
|
||||
util_align_npot(width, pitchalign * vk_format_get_blockwidth(format));
|
||||
else
|
||||
slice->pitch = align(width, pitchalign);
|
||||
|
||||
slice->offset = layer_size;
|
||||
blocks = vk_format_get_block_count(format, slice->pitch, aligned_height);
|
||||
|
||||
/* 1d array and 2d array textures must all have the same layer size
|
||||
* for each miplevel on a6xx. 3d textures can have different layer
|
||||
* sizes for high levels, but the hw auto-sizer is buggy (or at least
|
||||
* different than what this code does), so as soon as the layer size
|
||||
* range gets into range, we stop reducing it.
|
||||
*/
|
||||
if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D) {
|
||||
if (level < 1 || image->layout.slices[level - 1].size0 > 0xf000) {
|
||||
slice->size0 = align(blocks * image->layout.cpp, 4096);
|
||||
} else {
|
||||
slice->size0 = image->layout.slices[level - 1].size0;
|
||||
}
|
||||
} else {
|
||||
slice->size0 = blocks * image->layout.cpp;
|
||||
}
|
||||
|
||||
layer_size += slice->size0 * depth;
|
||||
if (ubwc_enabled) {
|
||||
/* with UBWC every level is aligned to 4K */
|
||||
layer_size = align(layer_size, 4096);
|
||||
|
||||
uint32_t block_width = tile_alignment[ta].ubwc_blockwidth;
|
||||
uint32_t block_height = tile_alignment[ta].ubwc_blockheight;
|
||||
uint32_t meta_pitch = align(DIV_ROUND_UP(width, block_width), RGB_TILE_WIDTH_ALIGNMENT);
|
||||
uint32_t meta_height = align(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT);
|
||||
|
||||
/* it looks like mipmaps need alignment to power of two
|
||||
* TODO: needs testing with large npot textures
|
||||
* (needed for the first level?)
|
||||
*/
|
||||
if (pCreateInfo->mipLevels > 1) {
|
||||
meta_pitch = util_next_power_of_two(meta_pitch);
|
||||
meta_height = util_next_power_of_two(meta_height);
|
||||
}
|
||||
|
||||
ubwc_slice->pitch = meta_pitch;
|
||||
ubwc_slice->offset = ubwc_size;
|
||||
ubwc_size += align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
|
||||
}
|
||||
}
|
||||
image->layout.layer_size = align(layer_size, 4096);
|
||||
|
||||
VkDeviceSize offset = ubwc_size * pCreateInfo->arrayLayers;
|
||||
for (unsigned level = 0; level < pCreateInfo->mipLevels; level++)
|
||||
image->layout.slices[level].offset += offset;
|
||||
|
||||
image->size = offset + image->layout.layer_size * pCreateInfo->arrayLayers;
|
||||
image->layout.ubwc_size = ubwc_size;
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_image_create(VkDevice _device,
|
||||
const VkImageCreateInfo *pCreateInfo,
|
||||
@@ -224,7 +83,6 @@ tu_image_create(VkDevice _device,
|
||||
image->level_count = pCreateInfo->mipLevels;
|
||||
image->layer_count = pCreateInfo->arrayLayers;
|
||||
image->samples = pCreateInfo->samples;
|
||||
image->layout.cpp = vk_format_get_blocksize(image->vk_format) * image->samples;
|
||||
|
||||
image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
|
||||
if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
|
||||
@@ -268,7 +126,10 @@ tu_image_create(VkDevice _device,
|
||||
ubwc_enabled = false;
|
||||
}
|
||||
|
||||
if (!tile_alignment[image->layout.cpp].ubwc_blockwidth) {
|
||||
uint32_t ubwc_blockwidth, ubwc_blockheight;
|
||||
fdl6_get_ubwc_blockwidth(&image->layout,
|
||||
&ubwc_blockwidth, &ubwc_blockheight);
|
||||
if (!ubwc_blockwidth) {
|
||||
tu_finishme("UBWC for cpp=%d", image->layout.cpp);
|
||||
ubwc_enabled = false;
|
||||
}
|
||||
@@ -276,7 +137,15 @@ tu_image_create(VkDevice _device,
|
||||
/* expect UBWC enabled if we asked for it */
|
||||
assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled);
|
||||
|
||||
setup_slices(image, pCreateInfo, ubwc_enabled);
|
||||
fdl6_layout(&image->layout, vk_format_to_pipe_format(image->vk_format),
|
||||
image->samples,
|
||||
pCreateInfo->extent.width,
|
||||
pCreateInfo->extent.height,
|
||||
pCreateInfo->extent.depth,
|
||||
pCreateInfo->mipLevels,
|
||||
pCreateInfo->arrayLayers,
|
||||
pCreateInfo->imageType == VK_IMAGE_TYPE_3D,
|
||||
ubwc_enabled);
|
||||
|
||||
*pImage = tu_image_to_handle(image);
|
||||
|
||||
@@ -418,8 +287,9 @@ tu_image_view_init(struct tu_image_view *iview,
|
||||
iview->descriptor[5] = base_addr >> 32;
|
||||
|
||||
if (image->layout.ubwc_size) {
|
||||
uint32_t block_width = tile_alignment[image->layout.cpp].ubwc_blockwidth;
|
||||
uint32_t block_height = tile_alignment[image->layout.cpp].ubwc_blockheight;
|
||||
uint32_t block_width, block_height;
|
||||
fdl6_get_ubwc_blockwidth(&image->layout,
|
||||
&block_width, &block_height);
|
||||
|
||||
iview->descriptor[3] |= A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL;
|
||||
iview->descriptor[7] = ubwc_addr;
|
||||
|
@@ -1293,7 +1293,6 @@ struct tu_image
|
||||
VkSampleCountFlagBits samples;
|
||||
|
||||
|
||||
VkDeviceSize size;
|
||||
uint32_t alignment;
|
||||
|
||||
struct fdl_layout layout;
|
||||
|
@@ -200,7 +200,7 @@ fd6_setup_slices(struct fd_resource *rsc)
|
||||
fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
|
||||
prsc->width0, prsc->height0, prsc->depth0,
|
||||
prsc->last_level + 1, prsc->array_size,
|
||||
prsc->target == PIPE_TEXTURE_3D);
|
||||
prsc->target == PIPE_TEXTURE_3D, false);
|
||||
|
||||
/* The caller does this bit of layout setup again. */
|
||||
if (rsc->layout.layer_first)
|
||||
|
Reference in New Issue
Block a user