nvk: Convert to using NIL for image layout

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>
This commit is contained in:
Faith Ekstrand
2023-01-30 20:11:49 -06:00
committed by Marge Bot
parent eefb60b832
commit 04d2bf2ee7
8 changed files with 143 additions and 665 deletions

View File

@@ -22,8 +22,8 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(nvk_image, dst, pBlitImageInfo->dstImage);
struct nouveau_ws_push *push = cmd->push;
assert(src->format->supports_2d_blit);
assert(dst->format->supports_2d_blit);
assert(nvk_get_format(src->vk.format)->supports_2d_blit);
assert(nvk_get_format(dst->vk.format)->supports_2d_blit);
nvk_push_image_ref(push, src, NOUVEAU_WS_BO_RD);
nvk_push_image_ref(push, dst, NOUVEAU_WS_BO_WR);
@@ -32,8 +32,11 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
P_IMMD(push, NV902D, SET_COLOR_KEY_ENABLE, V_FALSE);
P_IMMD(push, NV902D, SET_RENDER_ENABLE_C, MODE_TRUE);
P_IMMD(push, NV902D, SET_SRC_FORMAT, src->format->hw_format);
P_IMMD(push, NV902D, SET_DST_FORMAT, dst->format->hw_format);
const uint32_t src_hw_format = nvk_get_format(src->vk.format)->hw_format;
const uint32_t dst_hw_format = nvk_get_format(dst->vk.format)->hw_format;
P_IMMD(push, NV902D, SET_SRC_FORMAT, src_hw_format);
P_IMMD(push, NV902D, SET_DST_FORMAT, dst_hw_format);
if (pBlitImageInfo->filter == VK_FILTER_NEAREST) {
P_IMMD(push, NV902D, SET_PIXELS_FROM_MEMORY_SAMPLE_MODE, {
@@ -53,9 +56,9 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
*
* NOTE: this only works for blits to 8 bit or packed formats
*/
if (vk_format_get_nr_components(src->format->vk_format) == 1 &&
src->format->hw_format != dst->format->hw_format) {
uint8_t mask = vk_format_is_snorm(dst->format->vk_format) ? 0x7f : 0xff;
if (vk_format_get_nr_components(src->vk.format) == 1 &&
src_hw_format != dst_hw_format) {
uint8_t mask = vk_format_is_snorm(dst->vk.format) ? 0x7f : 0xff;
P_MTHD(push, NV902D, SET_BETA4);
P_NV902D_SET_BETA4(push, {
.r = mask,
@@ -69,9 +72,6 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
const VkImageBlit2 *region = &pBlitImageInfo->pRegions[r];
struct nvk_image_level *src_level = &src->level[region->srcSubresource.mipLevel];
struct nvk_image_level *dst_level = &dst->level[region->dstSubresource.mipLevel];
unsigned x_i = region->dstOffsets[0].x < region->dstOffsets[1].x ? 0 : 1;
unsigned y_i = region->dstOffsets[0].y < region->dstOffsets[1].y ? 0 : 1;
@@ -100,43 +100,53 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
src_start_x_fp += scaling_x_fp / 2;
src_start_y_fp += scaling_y_fp / 2;
if (src_level->tile.is_tiled) {
const struct nil_image_level *src_level =
&src->nil.levels[region->srcSubresource.mipLevel];
const VkExtent3D src_level_extent =
vk_image_mip_level_extent(&src->vk, region->srcSubresource.mipLevel);
if (src_level->tiling.is_tiled) {
P_MTHD(push, NV902D, SET_SRC_MEMORY_LAYOUT);
P_NV902D_SET_SRC_MEMORY_LAYOUT(push, V_BLOCKLINEAR);
P_NV902D_SET_SRC_BLOCK_SIZE(push, {
.height = src_level->tile.y,
.depth = src_level->tile.z,
.height = src_level->tiling.y_log2,
.depth = src_level->tiling.z_log2,
});
} else {
P_IMMD(push, NV902D, SET_SRC_MEMORY_LAYOUT, V_PITCH);
}
P_MTHD(push, NV902D, SET_SRC_DEPTH);
P_NV902D_SET_SRC_DEPTH(push, src_level->extent.depth);
P_NV902D_SET_SRC_DEPTH(push, src_level_extent.depth);
P_MTHD(push, NV902D, SET_SRC_PITCH);
P_NV902D_SET_SRC_PITCH(push, src_level->row_stride);
P_NV902D_SET_SRC_WIDTH(push, src_level->extent.width);
P_NV902D_SET_SRC_HEIGHT(push, src_level->extent.height);
P_NV902D_SET_SRC_PITCH(push, src_level->row_stride_B);
P_NV902D_SET_SRC_WIDTH(push, src_level_extent.width);
P_NV902D_SET_SRC_HEIGHT(push, src_level_extent.height);
if (dst_level->tile.is_tiled) {
const struct nil_image_level *dst_level =
&dst->nil.levels[region->dstSubresource.mipLevel];
const VkExtent3D dst_level_extent =
vk_image_mip_level_extent(&dst->vk, region->dstSubresource.mipLevel);
if (dst_level->tiling.is_tiled) {
P_MTHD(push, NV902D, SET_DST_MEMORY_LAYOUT);
P_NV902D_SET_DST_MEMORY_LAYOUT(push, V_BLOCKLINEAR);
P_NV902D_SET_DST_BLOCK_SIZE(push, {
.height = dst_level->tile.y,
.depth = dst_level->tile.z,
.height = dst_level->tiling.y_log2,
.depth = dst_level->tiling.z_log2,
});
} else {
P_IMMD(push, NV902D, SET_DST_MEMORY_LAYOUT, V_PITCH);
}
P_MTHD(push, NV902D, SET_DST_DEPTH);
P_NV902D_SET_DST_DEPTH(push, dst_level->extent.depth);
P_NV902D_SET_DST_DEPTH(push, dst_level_extent.depth);
P_MTHD(push, NV902D, SET_DST_PITCH);
P_NV902D_SET_DST_PITCH(push, dst_level->row_stride);
P_NV902D_SET_DST_WIDTH(push, dst_level->extent.width);
P_NV902D_SET_DST_HEIGHT(push, dst_level->extent.height);
P_NV902D_SET_DST_PITCH(push, dst_level->row_stride_B);
P_NV902D_SET_DST_WIDTH(push, dst_level_extent.width);
P_NV902D_SET_DST_HEIGHT(push, dst_level_extent.height);
P_MTHD(push, NV902D, SET_PIXELS_FROM_MEMORY_DST_X0);
P_NV902D_SET_PIXELS_FROM_MEMORY_DST_X0(push, dst_start_x);
@@ -154,13 +164,15 @@ nvk_CmdBlitImage2(VkCommandBuffer commandBuffer,
assert(src->vk.image_type != VK_IMAGE_TYPE_3D);
assert(dst->vk.image_type != VK_IMAGE_TYPE_3D);
for (unsigned w = 0; w < region->srcSubresource.layerCount; w++) {
VkDeviceSize src_addr = nvk_image_base_address(src, region->srcSubresource.mipLevel);
VkDeviceSize dst_addr = nvk_image_base_address(dst, region->dstSubresource.mipLevel);
const uint32_t src_layer = w + region->srcSubresource.baseArrayLayer;
const VkDeviceSize src_addr = nvk_image_base_address(src) +
src_layer * src->nil.array_stride_B +
src_level->offset_B;
src_addr += (w + region->srcSubresource.baseArrayLayer) *
src_level->layer_stride;
dst_addr += (w + region->dstSubresource.baseArrayLayer) *
dst_level->layer_stride;
const uint32_t dst_layer = w + region->dstSubresource.baseArrayLayer;
const VkDeviceSize dst_addr = nvk_image_base_address(dst) +
dst_layer * dst->nil.array_stride_B +
dst_level->offset_B;
P_MTHD(push, NV902D, SET_SRC_OFFSET_UPPER);
P_NV902D_SET_SRC_OFFSET_UPPER(push, src_addr >> 32);

View File

@@ -53,8 +53,8 @@ struct nouveau_copy_buffer {
uint32_t base_array_layer;
VkExtent3D extent;
uint32_t row_stride;
uint32_t layer_stride;
struct nvk_tile tile;
uint32_t array_stride;
struct nil_tiling tiling;
};
struct nouveau_copy {
@@ -74,7 +74,7 @@ nouveau_copy_rect_buffer(
return (struct nouveau_copy_buffer) {
.base_addr = nvk_buffer_address(buf, offset),
.row_stride = buffer_layout.row_stride_B,
.layer_stride = buffer_layout.image_stride_B,
.array_stride = buffer_layout.image_stride_B,
};
}
@@ -84,16 +84,15 @@ nouveau_copy_rect_image(
VkOffset3D offset,
const VkImageSubresourceLayers *sub_res)
{
struct nvk_image_level *level = &img->level[sub_res->mipLevel];
struct nouveau_copy_buffer buf = {
.base_addr = nvk_image_base_address(img, sub_res->mipLevel),
.base_addr = nvk_image_base_address(img) +
img->nil.levels[sub_res->mipLevel].offset_B,
.offset = vk_image_sanitize_offset(&img->vk, offset),
.extent = level->extent,
.extent = vk_image_mip_level_extent(&img->vk, sub_res->mipLevel),
.base_array_layer = sub_res->baseArrayLayer,
.row_stride = level->row_stride,
.layer_stride = level->layer_stride,
.tile = level->tile,
.row_stride = img->nil.levels[sub_res->mipLevel].row_stride_B,
.array_stride = img->nil.array_stride_B,
.tiling = img->nil.levels[sub_res->mipLevel].tiling,
};
return buf;
@@ -108,15 +107,15 @@ nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy)
VkDeviceSize src_addr = copy->src.base_addr;
VkDeviceSize dst_addr = copy->dst.base_addr;
src_addr += (w + copy->src.base_array_layer) * copy->src.layer_stride;
dst_addr += (w + copy->dst.base_array_layer) * copy->dst.layer_stride;
src_addr += (w + copy->src.base_array_layer) * copy->src.array_stride;
dst_addr += (w + copy->dst.base_array_layer) * copy->dst.array_stride;
if (!copy->src.tile.is_tiled) {
if (!copy->src.tiling.is_tiled) {
src_addr += copy->src.offset.x * copy->bpp +
copy->src.offset.y * copy->src.row_stride;
}
if (!copy->dst.tile.is_tiled) {
if (!copy->dst.tiling.is_tiled) {
dst_addr += copy->dst.offset.x * copy->bpp +
copy->dst.offset.y * copy->dst.row_stride;
}
@@ -133,14 +132,15 @@ nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy)
P_NV90B5_LINE_COUNT(push, copy->extent.height);
uint32_t src_layout = 0, dst_layout = 0;
if (copy->src.tile.is_tiled) {
assert(copy->src.tile.is_fermi);
if (copy->src.tiling.is_tiled) {
P_MTHD(push, NV90B5, SET_SRC_BLOCK_SIZE);
P_NV90B5_SET_SRC_BLOCK_SIZE(push, {
.width = copy->src.tile.x,
.height = copy->src.tile.y,
.depth = copy->src.tile.z,
.gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8,
.width = 0, /* Tiles are always 1 GOB wide */
.height = copy->src.tiling.y_log2,
.depth = copy->src.tiling.z_log2,
.gob_height = copy->src.tiling.gob_height_8 ?
GOB_HEIGHT_GOB_HEIGHT_FERMI_8 :
GOB_HEIGHT_GOB_HEIGHT_TESLA_4,
});
P_NV90B5_SET_SRC_WIDTH(push, copy->src.extent.width * copy->bpp);
P_NV90B5_SET_SRC_HEIGHT(push, copy->src.extent.height);
@@ -161,18 +161,19 @@ nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy)
src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_BLOCKLINEAR;
} else {
src_addr += copy->src.layer_stride;
src_addr += copy->src.array_stride;
src_layout = NV90B5_LAUNCH_DMA_SRC_MEMORY_LAYOUT_PITCH;
}
if (copy->dst.tile.is_tiled) {
assert(copy->dst.tile.is_fermi);
if (copy->dst.tiling.is_tiled) {
P_MTHD(push, NV90B5, SET_DST_BLOCK_SIZE);
P_NV90B5_SET_DST_BLOCK_SIZE(push, {
.width = copy->dst.tile.x,
.height = copy->dst.tile.y,
.depth = copy->dst.tile.z,
.gob_height = GOB_HEIGHT_GOB_HEIGHT_FERMI_8,
.width = 0, /* Tiles are always 1 GOB wide */
.height = copy->dst.tiling.y_log2,
.depth = copy->dst.tiling.z_log2,
.gob_height = copy->dst.tiling.gob_height_8 ?
GOB_HEIGHT_GOB_HEIGHT_FERMI_8 :
GOB_HEIGHT_GOB_HEIGHT_TESLA_4,
});
P_NV90B5_SET_DST_WIDTH(push, copy->dst.extent.width * copy->bpp);
P_NV90B5_SET_DST_HEIGHT(push, copy->dst.extent.height);
@@ -193,7 +194,7 @@ nouveau_copy_rect(struct nvk_cmd_buffer *cmd, struct nouveau_copy *copy)
dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_BLOCKLINEAR;
} else {
dst_addr += copy->dst.layer_stride;
dst_addr += copy->dst.array_stride;
dst_layout = NV90B5_LAUNCH_DMA_DST_MEMORY_LAYOUT_PITCH;
}

View File

@@ -164,6 +164,11 @@ struct nvk_format nvk_formats[] = {
.hw_format = NV90C0_SET_SU_LD_ST_TARGET_FORMAT_COLOR_RU32,
.supports_2d_blit = false,
},
{
.vk_format = VK_FORMAT_R16_UINT,
.hw_format = NV90C0_SET_SU_LD_ST_TARGET_FORMAT_COLOR_RU16,
.supports_2d_blit = false,
},
};
const struct nvk_format *
@@ -176,318 +181,3 @@ nvk_get_format(VkFormat vk_format)
return NULL;
}
#include "gallium/drivers/nouveau/nv50/g80_defs.xml.h"
#include "gallium/drivers/nouveau/nv50/g80_texture.xml.h"
#include "gallium/drivers/nouveau/nvc0/gm107_texture.xml.h"
/* Abbreviated usage masks:
* T: texturing
* R: render target
* B: render target, blendable
* C: render target (color), blendable only on nvc0
* D: scanout/display target, blendable
* Z: depth/stencil
* I: image / surface, implies T
*/
#define SF_A(sz) G80_TIC_0_COMPONENTS_SIZES_##sz
#define SF_B(sz) G200_TIC_0_COMPONENTS_SIZES_##sz
#define SF_C(sz) GF100_TIC_0_COMPONENTS_SIZES_##sz
#define SF_D(sz) GM107_TIC2_0_COMPONENTS_SIZES_##sz
#define SF(c, pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \
[PIPE_FORMAT_##pf] = { \
SF_##c(sz), \
G80_TIC_TYPE_##t0, \
G80_TIC_TYPE_##t1, \
G80_TIC_TYPE_##t2, \
G80_TIC_TYPE_##t3, \
G80_TIC_SOURCE_##r, \
G80_TIC_SOURCE_##g, \
G80_TIC_SOURCE_##b, \
G80_TIC_SOURCE_##a, \
}
#define C4(c, p, n, r, g, b, a, t, s, u) \
SF(c, p, G80_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u)
#define ZX(c, p, n, r, g, b, a, t, s, u) \
SF(c, p, G80_ZETA_FORMAT_##n, \
r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
#define ZS(c, p, n, r, g, b, a, t, s, u) \
SF(c, p, G80_ZETA_FORMAT_##n, \
r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u)
#define SZ(c, p, n, r, g, b, a, t, s, u) \
SF(c, p, G80_ZETA_FORMAT_##n, \
r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u)
#define SX(c, p, r, s, u) \
SF(c, p, G80_ZETA_FORMAT_NONE, \
r, r, r, r, UINT, UINT, UINT, UINT, s, u)
#define F3(c, p, n, r, g, b, a, t, s, u) \
C4(c, p, n, r, g, b, ONE_FLOAT, t, s, u)
#define I3(c, p, n, r, g, b, a, t, s, u) \
C4(c, p, n, r, g, b, ONE_INT, t, s, u)
#define F2(c, p, n, r, g, b, a, t, s, u) \
C4(c, p, n, r, g, ZERO, ONE_FLOAT, t, s, u)
#define I2(c, p, n, r, g, b, a, t, s, u) \
C4(c, p, n, r, g, ZERO, ONE_INT, t, s, u)
#define F1(c, p, n, r, g, b, a, t, s, u) \
C4(c, p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u)
#define I1(c, p, n, r, g, b, a, t, s, u) \
C4(c, p, n, r, ZERO, ZERO, ONE_INT, t, s, u)
#define A1(c, p, n, r, g, b, a, t, s, u) \
C4(c, p, n, ZERO, ZERO, ZERO, a, t, s, u)
const struct nvk_tic_format pipe_to_nvk_tic_format[PIPE_FORMAT_COUNT] =
{
C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM, A8B8G8R8, ID),
F3(A, B8G8R8X8_UNORM, BGRX8_UNORM, B, G, R, xx, UNORM, A8B8G8R8, TD),
C4(A, B8G8R8A8_SRGB, BGRA8_SRGB, B, G, R, A, UNORM, A8B8G8R8, TD),
F3(A, B8G8R8X8_SRGB, BGRX8_SRGB, B, G, R, xx, UNORM, A8B8G8R8, TD),
C4(A, R8G8B8A8_UNORM, RGBA8_UNORM, R, G, B, A, UNORM, A8B8G8R8, IB),
F3(A, R8G8B8X8_UNORM, RGBX8_UNORM, R, G, B, xx, UNORM, A8B8G8R8, TB),
C4(A, R8G8B8A8_SRGB, RGBA8_SRGB, R, G, B, A, UNORM, A8B8G8R8, TB),
F3(A, R8G8B8X8_SRGB, RGBX8_SRGB, R, G, B, xx, UNORM, A8B8G8R8, TB),
ZX(B, Z16_UNORM, Z16_UNORM, R, R, R, xx, UNORM, Z16, TZ),
ZX(A, Z32_FLOAT, Z32_FLOAT, R, R, R, xx, FLOAT, ZF32, TZ),
ZX(A, Z24X8_UNORM, Z24_X8_UNORM, R, R, R, xx, UNORM, X8Z24, TZ),
SZ(A, X8Z24_UNORM, S8_Z24_UNORM, G, G, G, xx, UNORM, Z24S8, TZ),
ZS(A, Z24_UNORM_S8_UINT, Z24_S8_UNORM, R, R, R, xx, UNORM, S8Z24, TZ),
SZ(A, S8_UINT_Z24_UNORM, S8_Z24_UNORM, G, G, G, xx, UNORM, Z24S8, TZ),
ZS(A, Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, R, R, R, xx, FLOAT, ZF32_X24S8, TZ),
SX(A, S8_UINT, R, R8, T),
SX(A, X24S8_UINT, G, G8R24, T),
SX(A, S8X24_UINT, R, G24R8, T),
SX(A, X32_S8X24_UINT, G, R32_B24G8, T),
F3(A, B5G6R5_UNORM, B5G6R5_UNORM, B, G, R, xx, UNORM, B5G6R5, TD),
C4(A, B5G5R5A1_UNORM, BGR5_A1_UNORM, B, G, R, A, UNORM, A1B5G5R5, TD),
F3(A, B5G5R5X1_UNORM, BGR5_X1_UNORM, B, G, R, xx, UNORM, A1B5G5R5, TD),
C4(A, B4G4R4A4_UNORM, NONE, B, G, R, A, UNORM, A4B4G4R4, T),
F3(A, B4G4R4X4_UNORM, NONE, B, G, R, xx, UNORM, A4B4G4R4, T),
F3(A, R9G9B9E5_FLOAT, NONE, R, G, B, xx, FLOAT, E5B9G9R9_SHAREDEXP, T),
C4(A, R10G10B10A2_UNORM, RGB10_A2_UNORM, R, G, B, A, UNORM, A2B10G10R10, ID),
F3(A, R10G10B10X2_UNORM, RGB10_A2_UNORM, R, G, B, xx, UNORM, A2B10G10R10, T),
C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, TB),
F3(A, B10G10R10X2_UNORM, BGR10_A2_UNORM, B, G, R, xx, UNORM, A2B10G10R10, T),
C4(A, R10G10B10A2_SNORM, NONE, R, G, B, A, SNORM, A2B10G10R10, T),
C4(A, B10G10R10A2_SNORM, NONE, B, G, R, A, SNORM, A2B10G10R10, T),
C4(A, R10G10B10A2_UINT, RGB10_A2_UINT, R, G, B, A, UINT, A2B10G10R10, TR),
C4(A, B10G10R10A2_UINT, RGB10_A2_UINT, B, G, R, A, UINT, A2B10G10R10, T),
F3(A, R11G11B10_FLOAT, R11G11B10_FLOAT, R, G, B, xx, FLOAT, BF10GF11RF11, IB),
F3(A, L8_UNORM, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
F3(A, L8_SRGB, NONE, R, R, R, xx, UNORM, R8, T),
F3(A, L8_SNORM, R8_SNORM, R, R, R, xx, SNORM, R8, TC),
I3(A, L8_SINT, R8_SINT, R, R, R, xx, SINT, R8, TR),
I3(A, L8_UINT, R8_UINT, R, R, R, xx, UINT, R8, TR),
F3(A, L16_UNORM, R16_UNORM, R, R, R, xx, UNORM, R16, TC),
F3(A, L16_SNORM, R16_SNORM, R, R, R, xx, SNORM, R16, TC),
F3(A, L16_FLOAT, R16_FLOAT, R, R, R, xx, FLOAT, R16, TB),
I3(A, L16_SINT, R16_SINT, R, R, R, xx, SINT, R16, TR),
I3(A, L16_UINT, R16_UINT, R, R, R, xx, UINT, R16, TR),
F3(A, L32_FLOAT, R32_FLOAT, R, R, R, xx, FLOAT, R32, TB),
I3(A, L32_SINT, R32_SINT, R, R, R, xx, SINT, R32, TR),
I3(A, L32_UINT, R32_UINT, R, R, R, xx, UINT, R32, TR),
C4(A, I8_UNORM, R8_UNORM, R, R, R, R, UNORM, R8, TR),
C4(A, I8_SNORM, R8_SNORM, R, R, R, R, SNORM, R8, TR),
C4(A, I8_SINT, R8_SINT, R, R, R, R, SINT, R8, TR),
C4(A, I8_UINT, R8_UINT, R, R, R, R, UINT, R8, TR),
C4(A, I16_UNORM, R16_UNORM, R, R, R, R, UNORM, R16, TR),
C4(A, I16_SNORM, R16_SNORM, R, R, R, R, SNORM, R16, TR),
C4(A, I16_FLOAT, R16_FLOAT, R, R, R, R, FLOAT, R16, TR),
C4(A, I16_SINT, R16_SINT, R, R, R, R, SINT, R16, TR),
C4(A, I16_UINT, R16_UINT, R, R, R, R, UINT, R16, TR),
C4(A, I32_FLOAT, R32_FLOAT, R, R, R, R, FLOAT, R32, TR),
C4(A, I32_SINT, R32_SINT, R, R, R, R, SINT, R32, TR),
C4(A, I32_UINT, R32_UINT, R, R, R, R, UINT, R32, TR),
A1(A, A8_UNORM, A8_UNORM, xx, xx, xx, R, UNORM, R8, TB),
A1(A, A8_SNORM, R8_SNORM, xx, xx, xx, R, SNORM, R8, T),
A1(A, A8_SINT, R8_SINT, xx, xx, xx, R, SINT, R8, T),
A1(A, A8_UINT, R8_UINT, xx, xx, xx, R, UINT, R8, T),
A1(A, A16_UNORM, R16_UNORM, xx, xx, xx, R, UNORM, R16, T),
A1(A, A16_SNORM, R16_SNORM, xx, xx, xx, R, SNORM, R16, T),
A1(A, A16_FLOAT, R16_FLOAT, xx, xx, xx, R, FLOAT, R16, T),
A1(A, A16_SINT, R16_SINT, xx, xx, xx, R, SINT, R16, T),
A1(A, A16_UINT, R16_UINT, xx, xx, xx, R, UINT, R16, T),
A1(A, A32_FLOAT, R32_FLOAT, xx, xx, xx, R, FLOAT, R32, T),
A1(A, A32_SINT, R32_SINT, xx, xx, xx, R, SINT, R32, T),
A1(A, A32_UINT, R32_UINT, xx, xx, xx, R, UINT, R32, T),
C4(A, L4A4_UNORM, NONE, R, R, R, G, UNORM, G4R4, T),
C4(A, L8A8_UNORM, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
C4(A, L8A8_SNORM, RG8_SNORM, R, R, R, G, SNORM, G8R8, T),
C4(A, L8A8_SRGB, NONE, R, R, R, G, UNORM, G8R8, T),
C4(A, L8A8_SINT, RG8_SINT, R, R, R, G, SINT, G8R8, T),
C4(A, L8A8_UINT, RG8_UINT, R, R, R, G, UINT, G8R8, T),
C4(A, L16A16_UNORM, RG16_UNORM, R, R, R, G, UNORM, R16_G16, T),
C4(A, L16A16_SNORM, RG16_SNORM, R, R, R, G, SNORM, R16_G16, T),
C4(A, L16A16_FLOAT, RG16_FLOAT, R, R, R, G, FLOAT, R16_G16, T),
C4(A, L16A16_SINT, RG16_SINT, R, R, R, G, SINT, R16_G16, T),
C4(A, L16A16_UINT, RG16_UINT, R, R, R, G, UINT, R16_G16, T),
C4(A, L32A32_FLOAT, RG32_FLOAT, R, R, R, G, FLOAT, R32_G32, T),
C4(A, L32A32_SINT, RG32_SINT, R, R, R, G, SINT, R32_G32, T),
C4(A, L32A32_UINT, RG32_UINT, R, R, R, G, UINT, R32_G32, T),
F3(A, DXT1_RGB, NONE, R, G, B, xx, UNORM, DXT1, T),
F3(A, DXT1_SRGB, NONE, R, G, B, xx, UNORM, DXT1, T),
C4(A, DXT1_RGBA, NONE, R, G, B, A, UNORM, DXT1, T),
C4(A, DXT1_SRGBA, NONE, R, G, B, A, UNORM, DXT1, T),
C4(A, DXT3_RGBA, NONE, R, G, B, A, UNORM, DXT23, T),
C4(A, DXT3_SRGBA, NONE, R, G, B, A, UNORM, DXT23, T),
C4(A, DXT5_RGBA, NONE, R, G, B, A, UNORM, DXT45, T),
C4(A, DXT5_SRGBA, NONE, R, G, B, A, UNORM, DXT45, T),
F1(A, RGTC1_UNORM, NONE, R, xx, xx, xx, UNORM, DXN1, T),
F1(A, RGTC1_SNORM, NONE, R, xx, xx, xx, SNORM, DXN1, T),
F2(A, RGTC2_UNORM, NONE, R, G, xx, xx, UNORM, DXN2, T),
F2(A, RGTC2_SNORM, NONE, R, G, xx, xx, SNORM, DXN2, T),
F3(A, LATC1_UNORM, NONE, R, R, R, xx, UNORM, DXN1, T),
F3(A, LATC1_SNORM, NONE, R, R, R, xx, SNORM, DXN1, T),
C4(A, LATC2_UNORM, NONE, R, R, R, G, UNORM, DXN2, T),
C4(A, LATC2_SNORM, NONE, R, R, R, G, SNORM, DXN2, T),
C4(C, BPTC_RGBA_UNORM, NONE, R, G, B, A, UNORM, BC7U, t),
C4(C, BPTC_SRGBA, NONE, R, G, B, A, UNORM, BC7U, t),
F3(C, BPTC_RGB_FLOAT, NONE, R, G, B, xx, FLOAT, BC6H_SF16, t),
F3(C, BPTC_RGB_UFLOAT, NONE, R, G, B, xx, FLOAT, BC6H_UF16, t),
#if NOUVEAU_DRIVER == 0xc0
F3(D, ETC1_RGB8, NONE, R, G, B, xx, UNORM, ETC2_RGB, t),
F3(D, ETC2_RGB8, NONE, R, G, B, xx, UNORM, ETC2_RGB, t),
F3(D, ETC2_SRGB8, NONE, R, G, B, xx, UNORM, ETC2_RGB, t),
C4(D, ETC2_RGB8A1, NONE, R, G, B, A, UNORM, ETC2_RGB_PTA, t),
C4(D, ETC2_SRGB8A1, NONE, R, G, B, A, UNORM, ETC2_RGB_PTA, t),
C4(D, ETC2_RGBA8, NONE, R, G, B, A, UNORM, ETC2_RGBA, t),
C4(D, ETC2_SRGBA8, NONE, R, G, B, A, UNORM, ETC2_RGBA, t),
F1(D, ETC2_R11_UNORM, NONE, R, xx, xx, xx, UNORM, EAC, t),
F1(D, ETC2_R11_SNORM, NONE, R, xx, xx, xx, SNORM, EAC, t),
F2(D, ETC2_RG11_UNORM, NONE, R, G, xx, xx, UNORM, EACX2, t),
F2(D, ETC2_RG11_SNORM, NONE, R, G, xx, xx, SNORM, EACX2, t),
C4(D, ASTC_4x4, NONE, R, G, B, A, UNORM, ASTC_2D_4X4, t),
C4(D, ASTC_5x4, NONE, R, G, B, A, UNORM, ASTC_2D_5X4, t),
C4(D, ASTC_5x5, NONE, R, G, B, A, UNORM, ASTC_2D_5X5, t),
C4(D, ASTC_6x5, NONE, R, G, B, A, UNORM, ASTC_2D_6X5, t),
C4(D, ASTC_6x6, NONE, R, G, B, A, UNORM, ASTC_2D_6X6, t),
C4(D, ASTC_8x5, NONE, R, G, B, A, UNORM, ASTC_2D_8X5, t),
C4(D, ASTC_8x6, NONE, R, G, B, A, UNORM, ASTC_2D_8X6, t),
C4(D, ASTC_8x8, NONE, R, G, B, A, UNORM, ASTC_2D_8X8, t),
C4(D, ASTC_10x5, NONE, R, G, B, A, UNORM, ASTC_2D_10X5, t),
C4(D, ASTC_10x6, NONE, R, G, B, A, UNORM, ASTC_2D_10X6, t),
C4(D, ASTC_10x8, NONE, R, G, B, A, UNORM, ASTC_2D_10X8, t),
C4(D, ASTC_10x10, NONE, R, G, B, A, UNORM, ASTC_2D_10X10, t),
C4(D, ASTC_12x10, NONE, R, G, B, A, UNORM, ASTC_2D_12X10, t),
C4(D, ASTC_12x12, NONE, R, G, B, A, UNORM, ASTC_2D_12X12, t),
C4(D, ASTC_4x4_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_4X4, t),
C4(D, ASTC_5x4_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_5X4, t),
C4(D, ASTC_5x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_5X5, t),
C4(D, ASTC_6x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_6X5, t),
C4(D, ASTC_6x6_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_6X6, t),
C4(D, ASTC_8x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_8X5, t),
C4(D, ASTC_8x6_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_8X6, t),
C4(D, ASTC_8x8_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_8X8, t),
C4(D, ASTC_10x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X5, t),
C4(D, ASTC_10x6_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X6, t),
C4(D, ASTC_10x8_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X8, t),
C4(D, ASTC_10x10_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X10, t),
C4(D, ASTC_12x10_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_12X10, t),
C4(D, ASTC_12x12_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_12X12, t),
#endif
C4(A, R32G32B32A32_FLOAT, RGBA32_FLOAT, R, G, B, A, FLOAT, R32_G32_B32_A32, IB),
C4(A, R32G32B32A32_UNORM, NONE, R, G, B, A, UNORM, R32_G32_B32_A32, T),
C4(A, R32G32B32A32_SNORM, NONE, R, G, B, A, SNORM, R32_G32_B32_A32, T),
C4(A, R32G32B32A32_SINT, RGBA32_SINT, R, G, B, A, SINT, R32_G32_B32_A32, IR),
C4(A, R32G32B32A32_UINT, RGBA32_UINT, R, G, B, A, UINT, R32_G32_B32_A32, IR),
F3(A, R32G32B32X32_FLOAT, RGBX32_FLOAT, R, G, B, xx, FLOAT, R32_G32_B32_A32, TB),
I3(A, R32G32B32X32_SINT, RGBX32_SINT, R, G, B, xx, SINT, R32_G32_B32_A32, TR),
I3(A, R32G32B32X32_UINT, RGBX32_UINT, R, G, B, xx, UINT, R32_G32_B32_A32, TR),
F3(C, R32G32B32_FLOAT, NONE, R, G, B, xx, FLOAT, R32_G32_B32, t),
I3(C, R32G32B32_SINT, NONE, R, G, B, xx, SINT, R32_G32_B32, t),
I3(C, R32G32B32_UINT, NONE, R, G, B, xx, UINT, R32_G32_B32, t),
F2(A, R32G32_FLOAT, RG32_FLOAT, R, G, xx, xx, FLOAT, R32_G32, IB),
F2(A, R32G32_UNORM, NONE, R, G, xx, xx, UNORM, R32_G32, T),
F2(A, R32G32_SNORM, NONE, R, G, xx, xx, SNORM, R32_G32, T),
I2(A, R32G32_SINT, RG32_SINT, R, G, xx, xx, SINT, R32_G32, IR),
I2(A, R32G32_UINT, RG32_UINT, R, G, xx, xx, UINT, R32_G32, IR),
F1(A, R32_FLOAT, R32_FLOAT, R, xx, xx, xx, FLOAT, R32, IB),
F1(A, R32_UNORM, NONE, R, xx, xx, xx, UNORM, R32, T),
F1(A, R32_SNORM, NONE, R, xx, xx, xx, SNORM, R32, T),
I1(A, R32_SINT, R32_SINT, R, xx, xx, xx, SINT, R32, IR),
I1(A, R32_UINT, R32_UINT, R, xx, xx, xx, UINT, R32, IR),
C4(A, R16G16B16A16_FLOAT, RGBA16_FLOAT, R, G, B, A, FLOAT, R16_G16_B16_A16, IB),
C4(A, R16G16B16A16_UNORM, RGBA16_UNORM, R, G, B, A, UNORM, R16_G16_B16_A16, IC),
C4(A, R16G16B16A16_SNORM, RGBA16_SNORM, R, G, B, A, SNORM, R16_G16_B16_A16, IC),
C4(A, R16G16B16A16_SINT, RGBA16_SINT, R, G, B, A, SINT, R16_G16_B16_A16, IR),
C4(A, R16G16B16A16_UINT, RGBA16_UINT, R, G, B, A, UINT, R16_G16_B16_A16, IR),
F3(A, R16G16B16X16_FLOAT, RGBX16_FLOAT, R, G, B, xx, FLOAT, R16_G16_B16_A16, TB),
F3(A, R16G16B16X16_UNORM, RGBA16_UNORM, R, G, B, xx, UNORM, R16_G16_B16_A16, T),
F3(A, R16G16B16X16_SNORM, RGBA16_SNORM, R, G, B, xx, SNORM, R16_G16_B16_A16, T),
I3(A, R16G16B16X16_SINT, RGBA16_SINT, R, G, B, xx, SINT, R16_G16_B16_A16, TR),
I3(A, R16G16B16X16_UINT, RGBA16_UINT, R, G, B, xx, UINT, R16_G16_B16_A16, TR),
F2(A, R16G16_FLOAT, RG16_FLOAT, R, G, xx, xx, FLOAT, R16_G16, IB),
F2(A, R16G16_UNORM, RG16_UNORM, R, G, xx, xx, UNORM, R16_G16, IC),
F2(A, R16G16_SNORM, RG16_SNORM, R, G, xx, xx, SNORM, R16_G16, IC),
I2(A, R16G16_SINT, RG16_SINT, R, G, xx, xx, SINT, R16_G16, IR),
I2(A, R16G16_UINT, RG16_UINT, R, G, xx, xx, UINT, R16_G16, IR),
F1(A, R16_FLOAT, R16_FLOAT, R, xx, xx, xx, FLOAT, R16, IB),
F1(A, R16_UNORM, R16_UNORM, R, xx, xx, xx, UNORM, R16, IC),
F1(A, R16_SNORM, R16_SNORM, R, xx, xx, xx, SNORM, R16, IC),
I1(A, R16_SINT, R16_SINT, R, xx, xx, xx, SINT, R16, IR),
I1(A, R16_UINT, R16_UINT, R, xx, xx, xx, UINT, R16, IR),
C4(A, R8G8B8A8_SNORM, RGBA8_SNORM, R, G, B, A, SNORM, A8B8G8R8, IC),
C4(A, R8G8B8A8_SINT, RGBA8_SINT, R, G, B, A, SINT, A8B8G8R8, IR),
C4(A, R8G8B8A8_UINT, RGBA8_UINT, R, G, B, A, UINT, A8B8G8R8, IR),
F3(A, R8G8B8X8_SNORM, RGBA8_SNORM, R, G, B, xx, SNORM, A8B8G8R8, T),
I3(A, R8G8B8X8_SINT, RGBA8_SINT, R, G, B, xx, SINT, A8B8G8R8, TR),
I3(A, R8G8B8X8_UINT, RGBA8_UINT, R, G, B, xx, UINT, A8B8G8R8, TR),
F2(A, R8G8_UNORM, RG8_UNORM, R, G, xx, xx, UNORM, G8R8, IB),
F2(A, R8G8_SNORM, RG8_SNORM, R, G, xx, xx, SNORM, G8R8, IC),
I2(A, R8G8_SINT, RG8_SINT, R, G, xx, xx, SINT, G8R8, IR),
I2(A, R8G8_UINT, RG8_UINT, R, G, xx, xx, UINT, G8R8, IR),
#if NOUVEAU_DRIVER < 0xc0
/* On Fermi+, the green component doesn't get decoding? */
F2(A, R8G8_SRGB, NONE, R, G, xx, xx, UNORM, G8R8, T),
#endif
F1(A, R8_UNORM, R8_UNORM, R, xx, xx, xx, UNORM, R8, IB),
F1(A, R8_SNORM, R8_SNORM, R, xx, xx, xx, SNORM, R8, IC),
I1(A, R8_SINT, R8_SINT, R, xx, xx, xx, SINT, R8, IR),
I1(A, R8_UINT, R8_UINT, R, xx, xx, xx, UINT, R8, IR),
F1(A, R8_SRGB, NONE, R, xx, xx, xx, UNORM, R8, T),
F3(A, R8G8_B8G8_UNORM, NONE, R, G, B, xx, UNORM, G8B8G8R8, T),
F3(A, G8R8_B8R8_UNORM, NONE, G, R, B, xx, UNORM, G8B8G8R8, T),
F3(A, G8R8_G8B8_UNORM, NONE, R, G, B, xx, UNORM, B8G8R8G8, T),
F3(A, R8G8_R8B8_UNORM, NONE, G, R, B, xx, UNORM, B8G8R8G8, T),
F1(A, R1_UNORM, BITMAP, R, xx, xx, xx, UNORM, R1, T),
C4(A, R4A4_UNORM, NONE, R, ZERO, ZERO, G, UNORM, G4R4, T),
C4(A, R8A8_UNORM, NONE, R, ZERO, ZERO, G, UNORM, G8R8, T),
C4(A, A4R4_UNORM, NONE, G, ZERO, ZERO, R, UNORM, G4R4, T),
C4(A, A8R8_UNORM, NONE, G, ZERO, ZERO, R, UNORM, G8R8, T),
SF(A, R8SG8SB8UX8U_NORM, 0, R, G, B, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, A8B8G8R8, T),
SF(A, R5SG5SB6U_NORM, 0, R, G, B, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, B6G5R5, T),
};

View File

@@ -12,18 +12,4 @@ struct nvk_format {
const struct nvk_format *nvk_get_format(VkFormat vk_format);
struct nvk_tic_format {
unsigned comp_sizes:8;
unsigned type_r:3;
unsigned type_g:3;
unsigned type_b:3;
unsigned type_a:3;
unsigned src_x:3;
unsigned src_y:3;
unsigned src_z:3;
unsigned src_w:3;
};
extern const struct nvk_tic_format pipe_to_nvk_tic_format[];
#endif

View File

@@ -7,86 +7,15 @@
#include "nvk_format.h"
#include "nvk_physical_device.h"
/* calculates optimal tiling for a given CreateInfo
*
* This ends being quite wasteful, but it's a more or less plain copy of what gallium does
*/
static struct nvk_tile
nvk_image_tile_from_create_info(
VkExtent3D extent,
const VkImageCreateInfo *pCreateInfo,
uint64_t modifier)
static enum nil_image_dim
vk_image_type_to_nil_dim(VkImageType type)
{
VkImageTiling tiling = pCreateInfo->tiling;
struct nvk_tile tile = {};
switch (tiling) {
case VK_IMAGE_TILING_LINEAR:
tile.is_tiled = false;
return tile;
case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT:
tile.is_fermi = true;
tile.is_tiled = true;
tile.x = 0;
tile.y = modifier & 0xf;
tile.z = 0;
return tile;
case VK_IMAGE_TILING_OPTIMAL:
/* code is below */
break;
switch (type) {
case VK_IMAGE_TYPE_1D: return NIL_IMAGE_DIM_1D;
case VK_IMAGE_TYPE_2D: return NIL_IMAGE_DIM_2D;
case VK_IMAGE_TYPE_3D: return NIL_IMAGE_DIM_3D;
default:
assert(!"unknown image tiling");
break;
}
uint32_t height = extent.height;
uint32_t depth = extent.depth;
// fermi is the baseline anyway (for now)
tile.is_fermi = true;
tile.is_tiled = true;
// always 0 for now
tile.x = 0;
if (height >= 256) tile.y = 5;
else if (height >= 128) tile.y = 4;
else if (height >= 64) tile.y = 3;
else if (height >= 32) tile.y = 2;
else if (height >= 16) tile.y = 1;
else tile.y = 0;
// not quite sure why, but gallium does the same
if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D)
tile.y = MIN2(tile.y, 2);
if (pCreateInfo->flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT)
return tile;
if (depth >= 32) tile.z = 5;
else if (depth >= 16) tile.z = 4;
else if (depth >= 8) tile.z = 3;
else if (depth >= 4) tile.z = 2;
else if (depth >= 2) tile.z = 1;
else tile.z = 0;
return tile;
}
static VkExtent3D
nvk_image_tile_to_blocks(struct nvk_tile tile)
{
if (!tile.is_tiled) {
return (VkExtent3D){1, 1, 1};
} else {
uint32_t height = tile.is_fermi ? 8 : 4;
return (VkExtent3D){
.width = 64 << tile.x,
.height = height << tile.y,
.depth = 1 << tile.z,
};
unreachable("Invalid image type");
}
}
@@ -94,37 +23,24 @@ static VkResult nvk_image_init(struct nvk_device *device,
struct nvk_image *image,
const VkImageCreateInfo *pCreateInfo)
{
uint64_t block_size = vk_format_get_blocksizebits(pCreateInfo->format) / 8;
vk_image_init(&device->vk, &image->vk, pCreateInfo);
image->format = nvk_get_format(pCreateInfo->format);
assert(image->format);
struct nil_image_init_info nil_info = {
.dim = vk_image_type_to_nil_dim(pCreateInfo->imageType),
.format = vk_format_to_pipe_format(pCreateInfo->format),
.extent_px = {
.w = pCreateInfo->extent.width,
.h = pCreateInfo->extent.height,
.d = pCreateInfo->extent.depth,
.a = pCreateInfo->arrayLayers,
},
.levels = pCreateInfo->mipLevels,
.samples = pCreateInfo->samples,
};
for (uint32_t l = 0; l < pCreateInfo->mipLevels; l++) {
struct nvk_image_level *level = &image->level[l];
VkExtent3D extent = vk_image_mip_level_extent(&image->vk, l);
struct nvk_tile tile = nvk_image_tile_from_create_info(
extent,
pCreateInfo,
0
);
VkExtent3D block = nvk_image_tile_to_blocks(tile);
/* need to apply a minimum alignment */
image->min_size = align(image->min_size, 0x80);
level->offset = image->min_size;
level->tile = tile;
level->extent = extent;
level->row_stride = align(extent.width * block_size, block.width);
/* for untiled images we need to align the row_stride to 0x80 */
if (!tile.is_tiled)
level->row_stride = align(level->row_stride, 0x80);
level->layer_stride = level->row_stride * align(extent.height, block.height);
image->min_size += level->layer_stride * align(extent.depth * image->vk.array_layers, block.depth);
}
ASSERTED bool ok = nil_image_init(nvk_device_physical(device)->dev,
&image->nil, &nil_info);
assert(ok);
return VK_SUCCESS;
}
@@ -186,7 +102,7 @@ VKAPI_ATTR void VKAPI_CALL nvk_GetImageMemoryRequirements2(
// TODO hope for the best?
pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
pMemoryRequirements->memoryRequirements.alignment = 0x1000;
pMemoryRequirements->memoryRequirements.size = image->min_size;
pMemoryRequirements->memoryRequirements.size = image->nil.size_B;
vk_foreach_struct_const(ext, pInfo->pNext) {
switch (ext->sType) {

View File

@@ -4,50 +4,17 @@
#include "nvk_private.h"
#include "nvk_device_memory.h"
#include "nil_image.h"
#include "nouveau_bo.h"
#include "nouveau_push.h"
#include "vulkan/runtime/vk_image.h"
#define NVK_MAX_MIP_LEVELS 7
/* x can either be 0x0 or 0xe
* 0x0: 64 blocks
* 0xe: 16 blocks (not quite sure how that's even used, so we don't use it)
*
* tile size on y depends on the is_fermi flag:
* !is_fermi: 4 << y
* is_fermi: 8 << y (required Pascal+)
*
* tile size on z is 1 << z
*/
struct nvk_tile {
uint8_t z:4;
uint8_t y:4;
uint8_t x:4;
bool is_fermi:1;
bool is_tiled:1;
};
struct nvk_format;
struct nvk_image_level {
VkDeviceSize offset;
VkExtent3D extent;
uint32_t row_stride;
uint32_t layer_stride;
struct nvk_tile tile;
};
struct nvk_image {
struct vk_image vk;
struct nvk_device_memory *mem;
VkDeviceSize offset;
VkDeviceSize min_size;
const struct nvk_format *format;
struct nvk_image_level level[NVK_MAX_MIP_LEVELS];
struct nil_image nil;
};
VK_DEFINE_HANDLE_CASTS(nvk_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
@@ -61,9 +28,9 @@ nvk_push_image_ref(struct nouveau_ws_push *push,
}
static inline uint64_t
nvk_image_base_address(struct nvk_image *image, uint32_t level)
nvk_image_base_address(struct nvk_image *image)
{
return image->mem->bo->offset + image->offset + image->level[level].offset;
return image->mem->bo->offset + image->offset;
}
#endif

View File

@@ -1,97 +1,39 @@
#include "nvk_image_view.h"
#include "nvk_device.h"
#include "nvk_physical_device.h"
#include "nvk_format.h"
#include "nvk_image.h"
#include "vulkan/util/vk_format.h"
#include "gallium/drivers/nouveau/nv50/g80_defs.xml.h"
#include "gallium/drivers/nouveau/nv50/g80_texture.xml.h"
#include "gallium/drivers/nouveau/nvc0/gm107_texture.xml.h"
static inline uint32_t
tic_swizzle(const struct nvk_tic_format *fmt,
VkComponentSwizzle swz, bool is_int)
static enum nil_view_type
vk_image_view_type_to_nil_view_type(VkImageViewType view_type)
{
switch (swz) {
case VK_COMPONENT_SWIZZLE_R: return fmt->src_x;
case VK_COMPONENT_SWIZZLE_G: return fmt->src_y;
case VK_COMPONENT_SWIZZLE_B: return fmt->src_z;
case VK_COMPONENT_SWIZZLE_A: return fmt->src_w;
case VK_COMPONENT_SWIZZLE_ONE:
return is_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
case VK_COMPONENT_SWIZZLE_ZERO:
return G80_TIC_SOURCE_ZERO;
default:
unreachable("Invalid component swizzle");
}
}
static uint32_t
gm107_tic2_0_format(VkFormat format, VkComponentMapping swizzle)
{
const enum pipe_format p_format = vk_format_to_pipe_format(format);
const struct nvk_tic_format *fmt = &pipe_to_nvk_tic_format[p_format];
const bool is_int = util_format_is_pure_integer(p_format);
const uint32_t swiz_x = tic_swizzle(fmt, swizzle.r, is_int);
const uint32_t swiz_y = tic_swizzle(fmt, swizzle.g, is_int);
const uint32_t swiz_z = tic_swizzle(fmt, swizzle.b, is_int);
const uint32_t swiz_w = tic_swizzle(fmt, swizzle.a, is_int);
uint32_t tic;
tic = fmt->comp_sizes << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
tic |= fmt->type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
tic |= fmt->type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
tic |= fmt->type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
tic |= fmt->type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
tic |= swiz_x << GM107_TIC2_0_X_SOURCE__SHIFT;
tic |= swiz_y << GM107_TIC2_0_Y_SOURCE__SHIFT;
tic |= swiz_z << GM107_TIC2_0_Z_SOURCE__SHIFT;
tic |= swiz_w << GM107_TIC2_0_W_SOURCE__SHIFT;
return tic;
}
static uint32_t
gm107_tic2_4_view_type(VkImageViewType vk_type)
{
switch (vk_type) {
case VK_IMAGE_VIEW_TYPE_1D:
return GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
case VK_IMAGE_VIEW_TYPE_2D:
return GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
case VK_IMAGE_VIEW_TYPE_3D:
return GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
case VK_IMAGE_VIEW_TYPE_CUBE:
return GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
return GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
return GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
return GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
switch (view_type) {
case VK_IMAGE_VIEW_TYPE_1D: return NIL_VIEW_TYPE_1D;
case VK_IMAGE_VIEW_TYPE_2D: return NIL_VIEW_TYPE_2D;
case VK_IMAGE_VIEW_TYPE_3D: return NIL_VIEW_TYPE_3D;
case VK_IMAGE_VIEW_TYPE_CUBE: return NIL_VIEW_TYPE_CUBE;
case VK_IMAGE_VIEW_TYPE_1D_ARRAY: return NIL_VIEW_TYPE_1D_ARRAY;
case VK_IMAGE_VIEW_TYPE_2D_ARRAY: return NIL_VIEW_TYPE_2D_ARRAY;
case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: return NIL_VIEW_TYPE_CUBE_ARRAY;
default:
unreachable("Invalid image view type");
}
}
static uint32_t
gm107_tic7_4_multi_sample_count(VkSampleCountFlagBits samples)
static enum pipe_swizzle
vk_swizzle_to_pipe(VkComponentSwizzle swizzle)
{
switch (samples) {
case VK_SAMPLE_COUNT_1_BIT:
return GM107_TIC2_7_MULTI_SAMPLE_COUNT_1X1;
case VK_SAMPLE_COUNT_2_BIT:
return GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X1;
case VK_SAMPLE_COUNT_4_BIT:
return GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2;
case VK_SAMPLE_COUNT_8_BIT:
return GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2;
case VK_SAMPLE_COUNT_16_BIT:
return GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X4;
switch (swizzle) {
case VK_COMPONENT_SWIZZLE_R: return PIPE_SWIZZLE_X;
case VK_COMPONENT_SWIZZLE_G: return PIPE_SWIZZLE_Y;
case VK_COMPONENT_SWIZZLE_B: return PIPE_SWIZZLE_Z;
case VK_COMPONENT_SWIZZLE_A: return PIPE_SWIZZLE_W;
case VK_COMPONENT_SWIZZLE_ONE: return PIPE_SWIZZLE_1;
case VK_COMPONENT_SWIZZLE_ZERO: return PIPE_SWIZZLE_0;
default:
unreachable("Unsupported sample count");
unreachable("Invalid component swizzle");
}
}
@@ -118,61 +60,25 @@ nvk_CreateImageView(VkDevice _device,
"Failed to allocate image descriptor");
}
uint32_t tic[8] = { 0, };
struct nil_view nil_view = {
.type = vk_image_view_type_to_nil_view_type(view->vk.view_type),
.format = vk_format_to_pipe_format(view->vk.format),
.base_level = view->vk.base_mip_level,
.num_levels = view->vk.level_count,
.base_array_layer = view->vk.base_array_layer,
.array_len = view->vk.layer_count,
.swizzle = {
vk_swizzle_to_pipe(view->vk.swizzle.r),
vk_swizzle_to_pipe(view->vk.swizzle.g),
vk_swizzle_to_pipe(view->vk.swizzle.b),
vk_swizzle_to_pipe(view->vk.swizzle.a),
},
};
tic[0] = gm107_tic2_0_format(view->vk.view_format, view->vk.swizzle);
tic[3] |= GM107_TIC2_3_LOD_ANISO_QUALITY_2;
tic[4] |= GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
if (vk_format_is_srgb(view->vk.view_format))
tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
/* TODO: Unnormalized? */
tic[5] |= GM107_TIC2_5_NORMALIZED_COORDS;
/* TODO: What about GOBS_PER_BLOCK_WIDTH? */
tic[2] |= GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
tic[3] |= image->level[0].tile.y << GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT__SHIFT;
tic[3] |= image->level[0].tile.z << GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH__SHIFT;
uint64_t address = nvk_image_base_address(image, 0);
tic[1] = address;
tic[2] |= address >> 32;
tic[4] |= gm107_tic2_4_view_type(view->vk.view_type);
/* TODO: NV50_TEXVIEW_FILTER_MSAA8 */
tic[3] |= GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
uint32_t depth;
if (view->vk.view_type == VK_IMAGE_VIEW_TYPE_3D) {
depth = view->vk.extent.depth;
} else if (view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE ||
view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
depth = view->vk.layer_count / 6;
} else {
depth = view->vk.layer_count;
}
tic[4] |= view->vk.extent.width - 1;
tic[5] |= view->vk.extent.height - 1;
tic[5] |= (depth - 1) << 16;
const uint32_t last_level = view->vk.base_mip_level +
view->vk.level_count - 1;
tic[3] |= last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
tic[6] |= GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
tic[7] |= (last_level << 4) | view->vk.base_mip_level;
tic[7] |= gm107_tic7_4_multi_sample_count(image->vk.samples);
assert(sizeof(tic) == device->images.desc_size);
memcpy(desc_map, tic, sizeof(tic));
nil_image_fill_tic(nvk_device_physical(device)->dev,
&image->nil, &nil_view,
nvk_image_base_address(image),
desc_map);
*pView = nvk_image_view_to_handle(view);

View File

@@ -479,7 +479,7 @@ nvk_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,
else
return VK_ERROR_FORMAT_NOT_SUPPORTED;
base_props->imageFormatProperties.maxMipLevels = NVK_MAX_MIP_LEVELS;
base_props->imageFormatProperties.maxMipLevels = 15;
base_props->imageFormatProperties.maxArrayLayers = 2048;
base_props->imageFormatProperties.sampleCounts = 0;
base_props->imageFormatProperties.maxResourceSize = 0xffffffff; // TODO proper value