turnip: new clear/blit implementation with shader path fallback

The shader path is used to implement the following cases:
* stencil aspect mask on D24S8 (for image_to_buffer,buffer_to_image)
* clear/copy msaa destination (2D engine can't have msaa dest)

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3783>
This commit is contained in:
Jonathan Marek
2020-03-13 11:57:23 -04:00
committed by Marge Bot
parent de6967488a
commit 2e084c2cb3
15 changed files with 2581 additions and 1993 deletions

View File

@@ -2383,24 +2383,27 @@ to upconvert to 32b float internally?
<bitfield name="COLOR_FORMAT" low="0" high="7" type="a6xx_format"/>
<bitfield name="TILE_MODE" low="8" high="9" type="a6xx_tile_mode"/>
<bitfield name="COLOR_SWAP" low="10" high="11" type="a3xx_color_swap"/>
<!-- b12 seems to be set when UBWC "FLAGS" buffer enabled -->
<bitfield name="FLAGS" pos="12" type="boolean"/>
<bitfield name="SRGB" pos="13" type="boolean"/>
<!-- the rest is only for src -->
<bitfield name="SAMPLES" low="14" high="15" type="a3xx_msaa_samples"/>
<bitfield name="FILTER" pos="16" type="boolean"/>
<bitfield name="SAMPLES_AVERAGE" pos="18" type="boolean"/>
<bitfield name="UNK20" pos="20" type="boolean"/>
<bitfield name="UNK22" pos="22" type="boolean"/>
</bitset>
<reg32 offset="0x8c17" name="RB_2D_DST_INFO" type="a6xx_2d_surf_info"/>
<reg32 offset="0x8c18" name="RB_2D_DST_LO"/>
<reg32 offset="0x8c19" name="RB_2D_DST_HI"/>
<reg64 offset="0x8c18" name="RB_2D_DST" type="waddress"/>
<reg32 offset="0x8c1a" name="RB_2D_DST_SIZE">
<bitfield name="PITCH" low="0" high="15" shr="6" type="uint"/>
</reg32>
<reg32 offset="0x8c20" name="RB_2D_DST_FLAGS_LO"/>
<reg32 offset="0x8c21" name="RB_2D_DST_FLAGS_HI"/>
<reg64 offset="0x8c20" name="RB_2D_DST_FLAGS" type="waddress"/>
<reg32 offset="0x8c22" name="RB_2D_DST_FLAGS_PITCH">
<bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/>
<bitfield name="ARRAY_PITCH" low="11" high="21" shr="7" type="uint"/>
@@ -3120,12 +3123,14 @@ to upconvert to 32b float internally?
</reg32>
<reg32 offset="0xb4c2" name="SP_PS_2D_SRC_LO"/>
<reg32 offset="0xb4c3" name="SP_PS_2D_SRC_HI"/>
<reg64 offset="0xb4c2" name="SP_PS_2D_SRC" type="waddress"/>
<reg32 offset="0xb4c4" name="SP_PS_2D_SRC_PITCH">
<bitfield name="PITCH" low="9" high="24" shr="6" type="uint"/>
</reg32>
<reg32 offset="0xb4ca" name="SP_PS_2D_SRC_FLAGS_LO"/>
<reg32 offset="0xb4cb" name="SP_PS_2D_SRC_FLAGS_HI"/>
<reg64 offset="0xb4ca" name="SP_PS_2D_SRC_FLAGS" type="waddress"/>
<reg32 offset="0xb4cc" name="SP_PS_2D_SRC_FLAGS_PITCH">
<bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/>
<bitfield name="ARRAY_PITCH" low="11" high="21" shr="7" type="uint"/>

View File

@@ -40,8 +40,7 @@ tu_extensions_c = custom_target(
)
libtu_files = files(
'tu_blit.c',
'tu_blit.h',
'tu_clear_blit.c',
'tu_cmd_buffer.c',
'tu_cs.c',
'tu_cs.h',
@@ -52,11 +51,6 @@ libtu_files = files(
'tu_fence.c',
'tu_formats.c',
'tu_image.c',
'tu_meta_blit.c',
'tu_meta_buffer.c',
'tu_meta_clear.c',
'tu_meta_copy.c',
'tu_meta_resolve.c',
'tu_pass.c',
'tu_pipeline.c',
'tu_pipeline_cache.c',

View File

@@ -1,372 +0,0 @@
/*
* Copyright © 2019 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Jonathan Marek <jonathan@marek.ca>
*
*/
#include "tu_blit.h"
#include "a6xx.xml.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
#include "vk_format.h"
#include "tu_cs.h"
/* TODO:
* - Avoid disabling tiling for swapped formats
* (image_to_image copy doesn't deal with it)
* - Fix d24_unorm_s8_uint support & aspects
* - UBWC
*/
static VkFormat
blit_copy_format(VkFormat format)
{
switch (vk_format_get_blocksizebits(format)) {
case 8: return VK_FORMAT_R8_UINT;
case 16: return VK_FORMAT_R16_UINT;
case 32: return VK_FORMAT_R32_UINT;
case 64: return VK_FORMAT_R32G32_UINT;
case 96: return VK_FORMAT_R32G32B32_UINT;
case 128:return VK_FORMAT_R32G32B32A32_UINT;
default:
unreachable("unhandled format size");
}
}
static uint32_t
blit_image_info(const struct tu_blit_surf *img, struct tu_native_format fmt, bool stencil_read)
{
if (fmt.fmt == FMT6_Z24_UNORM_S8_UINT)
fmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
if (stencil_read)
fmt.swap = XYZW;
return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(fmt.fmt) |
A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) |
A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(fmt.swap) |
COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
COND(img->ubwc_size, A6XX_SP_PS_2D_SRC_INFO_FLAGS);
}
static void
emit_blit_step(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
const struct tu_blit *blt)
{
struct tu_physical_device *phys_dev = cmdbuf->device->physical_device;
struct tu_native_format dfmt = tu6_format_color(blt->dst.fmt, blt->dst.image_tile_mode);
struct tu_native_format sfmt = tu6_format_texture(blt->src.fmt, blt->src.image_tile_mode);
if (dfmt.fmt == FMT6_Z24_UNORM_S8_UINT)
dfmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
enum a6xx_2d_ifmt ifmt = tu6_fmt_to_ifmt(dfmt.fmt);
if (vk_format_is_srgb(blt->dst.fmt)) {
assert(ifmt == R2D_UNORM8);
ifmt = R2D_UNORM8_SRGB;
}
uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_ROTATE(blt->rotation) |
COND(blt->type == TU_BLIT_CLEAR, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(dfmt.fmt) | /* not required? */
COND(dfmt.fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8,
A6XX_RB_2D_BLIT_CNTL_D24S8) |
A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
tu_cs_emit(cs, blit_cntl);
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
tu_cs_emit(cs, blit_cntl);
/*
* Emit source:
*/
if (blt->type == TU_BLIT_CLEAR) {
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
tu_cs_emit(cs, blt->clear_value[0]);
tu_cs_emit(cs, blt->clear_value[1]);
tu_cs_emit(cs, blt->clear_value[2]);
tu_cs_emit(cs, blt->clear_value[3]);
} else {
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
tu_cs_emit(cs, blit_image_info(&blt->src, sfmt, blt->stencil_read) |
A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt->src.samples)) |
/* TODO: should disable this bit for integer formats ? */
COND(blt->src.samples > 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
COND(blt->filter, A6XX_SP_PS_2D_SRC_INFO_FILTER) |
0x500000);
tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt->src.x + blt->src.width) |
A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt->src.y + blt->src.height));
tu_cs_emit_qw(cs, blt->src.va);
tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt->src.pitch));
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
if (blt->src.ubwc_size) {
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6);
tu_cs_emit_qw(cs, blt->src.ubwc_va);
tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(blt->src.ubwc_pitch) |
A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(blt->src.ubwc_size >> 2));
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
}
}
/*
* Emit destination:
*/
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 9);
tu_cs_emit(cs, blit_image_info(&blt->dst, dfmt, false));
tu_cs_emit_qw(cs, blt->dst.va);
tu_cs_emit(cs, A6XX_RB_2D_DST_SIZE_PITCH(blt->dst.pitch));
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
if (blt->dst.ubwc_size) {
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS_LO, 6);
tu_cs_emit_qw(cs, blt->dst.ubwc_va);
tu_cs_emit(cs, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(blt->dst.ubwc_pitch) |
A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(blt->dst.ubwc_size >> 2));
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
tu_cs_emit(cs, 0x00000000);
}
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x));
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1));
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_Y_Y(blt->src.y));
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_Y_Y(blt->src.y + blt->src.height - 1));
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_DST_TL, 2);
tu_cs_emit(cs, A6XX_GRAS_2D_DST_TL_X(blt->dst.x) |
A6XX_GRAS_2D_DST_TL_Y(blt->dst.y));
tu_cs_emit(cs, A6XX_GRAS_2D_DST_BR_X(blt->dst.x + blt->dst.width - 1) |
A6XX_GRAS_2D_DST_BR_Y(blt->dst.y + blt->dst.height - 1));
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
tu_cs_emit(cs, 0x3f);
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
tu_cs_emit(cs, 0);
if (dfmt.fmt == FMT6_10_10_10_2_UNORM_DEST)
dfmt.fmt = FMT6_16_16_16_16_FLOAT;
tu_cs_emit_pkt4(cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
tu_cs_emit(cs, COND(vk_format_is_sint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_SINT) |
COND(vk_format_is_uint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_UINT) |
A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(dfmt.fmt) |
COND(ifmt == R2D_UNORM8_SRGB, A6XX_SP_2D_SRC_FORMAT_SRGB) |
A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
tu_cs_emit(cs, phys_dev->magic.RB_UNKNOWN_8E04_blit);
tu_cs_emit_pkt7(cs, CP_BLIT, 1);
tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
tu_cs_emit_wfi(cs);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
tu_cs_emit(cs, 0);
}
void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
struct tu_blit *blt)
{
struct tu_physical_device *phys_dev = cmdbuf->device->physical_device;
switch (blt->type) {
case TU_BLIT_COPY:
blt->stencil_read =
blt->dst.fmt == VK_FORMAT_R8_UNORM &&
blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT;
assert(vk_format_get_blocksize(blt->dst.fmt) ==
vk_format_get_blocksize(blt->src.fmt) || blt->stencil_read);
assert(blt->src.samples == blt->dst.samples);
if (vk_format_is_compressed(blt->src.fmt)) {
unsigned block_width = vk_format_get_blockwidth(blt->src.fmt);
unsigned block_height = vk_format_get_blockheight(blt->src.fmt);
blt->src.pitch /= block_width;
blt->src.x /= block_width;
blt->src.y /= block_height;
blt->src.fmt = blit_copy_format(blt->src.fmt);
/* for image_to_image copy, width/height is on the src format */
blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width);
blt->dst.height = blt->src.height = DIV_ROUND_UP(blt->src.height, block_height);
}
if (vk_format_is_compressed(blt->dst.fmt)) {
unsigned block_width = vk_format_get_blockwidth(blt->dst.fmt);
unsigned block_height = vk_format_get_blockheight(blt->dst.fmt);
blt->dst.pitch /= block_width;
blt->dst.x /= block_width;
blt->dst.y /= block_height;
blt->dst.fmt = blit_copy_format(blt->dst.fmt);
}
if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
blt->dst.fmt = blit_copy_format(blt->dst.fmt);
if (blt->src.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
blt->src.fmt = blit_copy_format(blt->src.fmt);
/* TODO: multisample image copy does not work correctly with tiling/UBWC */
blt->src.x *= blt->src.samples;
blt->dst.x *= blt->dst.samples;
blt->src.width *= blt->src.samples;
blt->dst.width *= blt->dst.samples;
blt->src.samples = 1;
blt->dst.samples = 1;
break;
case TU_BLIT_CLEAR:
/* unsupported format cleared as UINT32 */
if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
blt->dst.fmt = VK_FORMAT_R32_UINT;
/* TODO: multisample image clearing also seems not to work with certain
* formats. The blob uses a shader-based clear in these cases.
*/
blt->dst.x *= blt->dst.samples;
blt->dst.width *= blt->dst.samples;
blt->dst.samples = 1;
blt->src = blt->dst;
break;
default:
assert(blt->dst.samples == 1);
}
tu6_emit_event_write(cmdbuf, cs, LRZ_FLUSH, false);
tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_COLOR_TS, true);
tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_DEPTH_TS, true);
tu6_emit_event_write(cmdbuf, cs, PC_CCU_INVALIDATE_COLOR, false);
tu6_emit_event_write(cmdbuf, cs, PC_CCU_INVALIDATE_DEPTH, false);
tu_cs_emit_wfi(cs);
tu_cs_emit_regs(cs,
A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass));
/* buffer copy setup */
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
for (unsigned layer = 0; layer < blt->layers; layer++) {
if (blt->buffer) {
struct tu_blit line_blt = *blt;
uint64_t dst_va = line_blt.dst.va, src_va = line_blt.src.va;
unsigned blocksize = vk_format_get_blocksize(blt->src.fmt);
uint32_t size = line_blt.src.width, tmp;
while (size) {
line_blt.src.x = (src_va & 63) / blocksize;
line_blt.src.va = src_va & ~63;
tmp = MIN2(size, 0x4000 - line_blt.src.x);
line_blt.dst.x = (dst_va & 63) / blocksize;
line_blt.dst.va = dst_va & ~63;
tmp = MIN2(tmp, 0x4000 - line_blt.dst.x);
line_blt.src.width = line_blt.dst.width = tmp;
emit_blit_step(cmdbuf, cs, &line_blt);
src_va += tmp * blocksize;
dst_va += tmp * blocksize;
size -= tmp;
}
} else if ((blt->src.va & 63) || (blt->src.pitch & 63)) {
/* per line copy path (buffer_to_image) */
assert(blt->type == TU_BLIT_COPY && !blt->src.image_tile_mode);
struct tu_blit line_blt = *blt;
uint64_t src_va = line_blt.src.va + blt->src.pitch * blt->src.y;
line_blt.src.y = 0;
line_blt.src.pitch = 0;
line_blt.src.height = 1;
line_blt.dst.height = 1;
for (unsigned y = 0; y < blt->src.height; y++) {
line_blt.src.x = blt->src.x + (src_va & 63) / vk_format_get_blocksize(blt->src.fmt);
line_blt.src.va = src_va & ~63;
emit_blit_step(cmdbuf, cs, &line_blt);
line_blt.dst.y++;
src_va += blt->src.pitch;
}
} else if ((blt->dst.va & 63) || (blt->dst.pitch & 63)) {
/* per line copy path (image_to_buffer) */
assert(blt->type == TU_BLIT_COPY && !blt->dst.image_tile_mode);
struct tu_blit line_blt = *blt;
uint64_t dst_va = line_blt.dst.va + blt->dst.pitch * blt->dst.y;
line_blt.dst.y = 0;
line_blt.dst.pitch = 0;
line_blt.src.height = 1;
line_blt.dst.height = 1;
for (unsigned y = 0; y < blt->src.height; y++) {
line_blt.dst.x = blt->dst.x + (dst_va & 63) / vk_format_get_blocksize(blt->dst.fmt);
line_blt.dst.va = dst_va & ~63;
emit_blit_step(cmdbuf, cs, &line_blt);
line_blt.src.y++;
dst_va += blt->dst.pitch;
}
} else {
emit_blit_step(cmdbuf, cs, blt);
}
blt->dst.va += blt->dst.layer_size;
blt->src.va += blt->src.layer_size;
blt->dst.ubwc_va += blt->dst.ubwc_size;
blt->src.ubwc_va += blt->src.ubwc_size;
}
tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_COLOR_TS, true);
tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_DEPTH_TS, true);
tu6_emit_event_write(cmdbuf, cs, CACHE_FLUSH_TS, true);
tu6_emit_event_write(cmdbuf, cs, CACHE_INVALIDATE, false);
}

View File

@@ -1,145 +0,0 @@
/*
* Copyright © 2019 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Jonathan Marek <jonathan@marek.ca>
*
*/
#ifndef TU_BLIT_H
#define TU_BLIT_H
#include "tu_private.h"
#include "vk_format.h"
struct tu_blit_surf {
VkFormat fmt;
enum a6xx_tile_mode tile_mode;
enum a6xx_tile_mode image_tile_mode;
uint64_t va;
uint32_t pitch, layer_size;
uint32_t x, y;
uint32_t width, height;
unsigned samples;
uint64_t ubwc_va;
uint32_t ubwc_pitch;
uint32_t ubwc_size;
};
static inline struct tu_blit_surf
tu_blit_surf(struct tu_image *image,
VkImageSubresourceLayers subres,
const VkOffset3D *offsets)
{
unsigned layer = subres.baseArrayLayer;
if (image->type == VK_IMAGE_TYPE_3D) {
assert(layer == 0);
layer = MIN2(offsets[0].z, offsets[1].z);
}
return (struct tu_blit_surf) {
.fmt = image->vk_format,
.tile_mode = tu6_get_image_tile_mode(image, subres.mipLevel),
.image_tile_mode = image->layout.tile_mode,
.va = tu_image_base(image, subres.mipLevel, layer),
.pitch = tu_image_stride(image, subres.mipLevel),
.layer_size = tu_layer_size(image, subres.mipLevel),
.x = MIN2(offsets[0].x, offsets[1].x),
.y = MIN2(offsets[0].y, offsets[1].y),
.width = abs(offsets[1].x - offsets[0].x),
.height = abs(offsets[1].y - offsets[0].y),
.samples = image->samples,
.ubwc_va = tu_image_ubwc_base(image, subres.mipLevel, layer),
.ubwc_pitch = tu_image_ubwc_pitch(image, subres.mipLevel),
.ubwc_size = tu_image_ubwc_size(image, subres.mipLevel),
};
}
static inline struct tu_blit_surf
tu_blit_surf_ext(struct tu_image *image,
VkImageSubresourceLayers subres,
VkOffset3D offset,
VkExtent3D extent)
{
return tu_blit_surf(image, subres, (VkOffset3D[]) {
offset, {.x = offset.x + extent.width,
.y = offset.y + extent.height,
.z = offset.z}
});
}
static inline struct tu_blit_surf
tu_blit_surf_whole(struct tu_image *image, int level, int layer)
{
return tu_blit_surf(image, (VkImageSubresourceLayers){
.mipLevel = level,
.baseArrayLayer = layer,
}, (VkOffset3D[]) {
{}, {
u_minify(image->extent.width, level),
u_minify(image->extent.height, level),
}
});
}
static inline struct tu_blit_surf
sysmem_attachment_surf(const struct tu_image_view *view, uint32_t base_layer,
const VkRect2D *rect)
{
return tu_blit_surf_ext(view->image, (VkImageSubresourceLayers) {
.mipLevel = view->base_mip,
.baseArrayLayer = base_layer,
}, (VkOffset3D) {
.x = rect->offset.x,
.y = rect->offset.y,
.z = 0,
}, (VkExtent3D) {
.width = rect->extent.width,
.height = rect->extent.height,
.depth = 1,
});
}
enum tu_blit_type {
TU_BLIT_DEFAULT,
TU_BLIT_COPY,
TU_BLIT_CLEAR,
};
struct tu_blit {
struct tu_blit_surf dst;
struct tu_blit_surf src;
uint32_t layers;
bool filter;
bool stencil_read;
bool buffer; /* 1d copy/clear */
enum a6xx_rotation rotation;
uint32_t clear_value[4];
enum tu_blit_type type;
};
void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
struct tu_blit *blt);
#endif /* TU_BLIT_H */

File diff suppressed because it is too large Load Diff

View File

@@ -33,7 +33,6 @@
#include "vk_format.h"
#include "tu_cs.h"
#include "tu_blit.h"
#define OVERFLOW_FLAG_REG REG_A6XX_CP_SCRATCH_REG(0)
@@ -111,69 +110,6 @@ tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
return VK_SUCCESS;
}
static bool
is_linear_mipmapped(const struct tu_image_view *iview)
{
return iview->image->layout.tile_mode == TILE6_LINEAR &&
iview->base_mip != iview->image->level_count - 1;
}
static bool
force_sysmem(const struct tu_cmd_buffer *cmd,
const struct VkRect2D *render_area)
{
const struct tu_framebuffer *fb = cmd->state.framebuffer;
bool has_linear_mipmapped_store = false;
const struct tu_render_pass *pass = cmd->state.pass;
/* Layered rendering requires sysmem. */
if (fb->layers > 1)
return true;
/* Iterate over all the places we call tu6_emit_store_attachment() */
for (unsigned i = 0; i < pass->subpass_count; i++) {
const struct tu_subpass *subpass = &pass->subpasses[i];
if (subpass->resolve_attachments) {
for (unsigned i = 0; i < subpass->color_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
if (a != VK_ATTACHMENT_UNUSED &&
cmd->state.pass->attachments[a].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
const struct tu_image_view *iview = fb->attachments[a].attachment;
if (is_linear_mipmapped(iview)) {
has_linear_mipmapped_store = true;
break;
}
}
}
}
}
for (unsigned i = 0; i < pass->attachment_count; i++) {
if (pass->attachments[i].gmem_offset >= 0 &&
cmd->state.pass->attachments[i].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
const struct tu_image_view *iview = fb->attachments[i].attachment;
if (is_linear_mipmapped(iview)) {
has_linear_mipmapped_store = true;
break;
}
}
}
/* Linear textures cannot have any padding between mipmap levels and their
* height isn't padded, while at the same time the GMEM->MEM resolve does
* not have per-pixel granularity, so if the image height isn't aligned to
* the resolve granularity and the render area is tall enough, we may wind
* up writing past the bottom of the image into the next miplevel or even
* past the end of the image. For the last miplevel, the layout code should
* insert enough padding so that the overdraw writes to the padding. To
* work around this, we force-enable sysmem rendering.
*/
const uint32_t y2 = render_area->offset.y + render_area->extent.height;
const uint32_t aligned_y2 = ALIGN_POT(y2, GMEM_ALIGN_H);
return has_linear_mipmapped_store && aligned_y2 > fb->height;
}
static void
tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
const struct tu_device *dev,
@@ -421,10 +357,6 @@ tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
}
}
#define tu_image_view_ubwc_pitches(iview) \
.pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip), \
.array_pitch = tu_image_ubwc_size(iview->image, iview->base_mip) >> 2
static void
tu6_emit_zs(struct tu_cmd_buffer *cmd,
const struct tu_subpass *subpass,
@@ -497,20 +429,18 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd,
continue;
const struct tu_image_view *iview = fb->attachments[a].attachment;
const enum a6xx_tile_mode tile_mode =
tu6_get_image_tile_mode(iview->image, iview->base_mip);
mrt_comp[i] = 0xf;
if (vk_format_is_srgb(iview->vk_format))
srgb_cntl |= (1 << i);
const struct tu_native_format format =
tu6_format_color(iview->vk_format, iview->image->layout.tile_mode);
struct tu_native_format format =
tu6_format_image(iview->image, iview->vk_format, iview->base_mip);
tu_cs_emit_regs(cs,
A6XX_RB_MRT_BUF_INFO(i,
.color_tile_mode = tile_mode,
.color_tile_mode = format.tile_mode,
.color_format = format.fmt,
.color_swap = format.swap),
A6XX_RB_MRT_PITCH(i, tu_image_stride(iview->image, iview->base_mip)),
@@ -563,12 +493,10 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd,
.type = LAYER_2D_ARRAY));
}
static void
tu6_emit_msaa(struct tu_cmd_buffer *cmd,
const struct tu_subpass *subpass,
struct tu_cs *cs)
void
tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits vk_samples)
{
const enum a3xx_msaa_samples samples = tu_msaa_samples(subpass->samples);
const enum a3xx_msaa_samples samples = tu_msaa_samples(vk_samples);
bool msaa_disable = samples == MSAA_ONE;
tu_cs_emit_regs(cs,
@@ -681,51 +609,8 @@ tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align)
A6XX_RB_BLIT_SCISSOR_BR(.x = x2, .y = y2));
}
static void
tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
const struct tu_image_view *iview,
uint32_t gmem_offset,
bool resolve)
{
tu_cs_emit_regs(cs,
A6XX_RB_BLIT_INFO(.unk0 = !resolve, .gmem = !resolve));
const struct tu_native_format format =
tu6_format_color(iview->vk_format, iview->image->layout.tile_mode);
enum a6xx_tile_mode tile_mode =
tu6_get_image_tile_mode(iview->image, iview->base_mip);
tu_cs_emit_regs(cs,
A6XX_RB_BLIT_DST_INFO(
.tile_mode = tile_mode,
.samples = tu_msaa_samples(iview->image->samples),
.color_format = format.fmt,
.color_swap = format.swap,
.flags = iview->image->layout.ubwc_layer_size != 0),
A6XX_RB_BLIT_DST(tu_image_view_base_ref(iview)),
A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)),
A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layout.layer_size));
if (iview->image->layout.ubwc_layer_size) {
tu_cs_emit_regs(cs,
A6XX_RB_BLIT_FLAG_DST(tu_image_view_ubwc_base_ref(iview)),
A6XX_RB_BLIT_FLAG_DST_PITCH(tu_image_view_ubwc_pitches(iview)));
}
tu_cs_emit_regs(cs,
A6XX_RB_BLIT_BASE_GMEM(gmem_offset));
}
static void
tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
tu6_emit_event_write(cmd, cs, BLIT, false);
}
static void
tu6_emit_window_scissor(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
void
tu6_emit_window_scissor(struct tu_cs *cs,
uint32_t x1,
uint32_t y1,
uint32_t x2,
@@ -740,11 +625,8 @@ tu6_emit_window_scissor(struct tu_cmd_buffer *cmd,
A6XX_GRAS_RESOLVE_CNTL_2(.x = x2, .y = y2));
}
static void
tu6_emit_window_offset(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t x1,
uint32_t y1)
void
tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1)
{
tu_cs_emit_regs(cs,
A6XX_RB_WINDOW_OFFSET(.x = x1, .y = y1));
@@ -783,6 +665,9 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd)
if (!cmd->state.pass->gmem_pixels)
return true;
if (cmd->state.framebuffer->layers > 1)
return true;
return cmd->state.tiling_config.force_sysmem;
}
@@ -801,8 +686,8 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
const uint32_t y1 = tile->begin.y;
const uint32_t x2 = tile->end.x - 1;
const uint32_t y2 = tile->end.y - 1;
tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
tu6_emit_window_offset(cmd, cs, x1, y1);
tu6_emit_window_scissor(cs, x1, y1, x2, y2);
tu6_emit_window_offset(cs, x1, y1);
tu_cs_emit_regs(cs,
A6XX_VPC_SO_OVERRIDE(.so_disable = false));
@@ -861,93 +746,6 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
}
}
static void
tu6_emit_load_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a)
{
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
const struct tu_framebuffer *fb = cmd->state.framebuffer;
const struct tu_image_view *iview = fb->attachments[a].attachment;
const struct tu_render_pass_attachment *attachment =
&cmd->state.pass->attachments[a];
if (attachment->gmem_offset < 0)
return;
const uint32_t x1 = tiling->render_area.offset.x;
const uint32_t y1 = tiling->render_area.offset.y;
const uint32_t x2 = x1 + tiling->render_area.extent.width;
const uint32_t y2 = y1 + tiling->render_area.extent.height;
const uint32_t tile_x2 =
tiling->tile0.offset.x + tiling->tile0.extent.width * tiling->tile_count.width;
const uint32_t tile_y2 =
tiling->tile0.offset.y + tiling->tile0.extent.height * tiling->tile_count.height;
bool need_load =
x1 != tiling->tile0.offset.x || x2 != MIN2(fb->width, tile_x2) ||
y1 != tiling->tile0.offset.y || y2 != MIN2(fb->height, tile_y2);
if (need_load)
tu_finishme("improve handling of unaligned render area");
if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
need_load = true;
if (vk_format_has_stencil(iview->vk_format) &&
attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
need_load = true;
if (need_load) {
tu6_emit_blit_info(cmd, cs, iview, attachment->gmem_offset, false);
tu6_emit_blit(cmd, cs);
}
}
static void
tu6_emit_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
uint32_t a,
const VkRenderPassBeginInfo *info)
{
const struct tu_framebuffer *fb = cmd->state.framebuffer;
const struct tu_image_view *iview = fb->attachments[a].attachment;
const struct tu_render_pass_attachment *attachment =
&cmd->state.pass->attachments[a];
unsigned clear_mask = 0;
/* note: this means it isn't used by any subpass and shouldn't be cleared anyway */
if (attachment->gmem_offset < 0)
return;
if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
clear_mask = 0xf;
if (vk_format_has_stencil(iview->vk_format)) {
clear_mask &= 0x1;
if (attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
clear_mask |= 0x2;
}
if (!clear_mask)
return;
tu_clear_gmem_attachment(cmd, cs, a, clear_mask,
&info->pClearValues[a]);
}
static void
tu6_emit_predicated_blit(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
uint32_t gmem_a,
bool resolve)
{
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
tu6_emit_blit_info(cmd, cs,
cmd->state.framebuffer->attachments[a].attachment,
cmd->state.pass->attachments[gmem_a].gmem_offset, resolve);
tu6_emit_blit(cmd, cs);
tu_cond_exec_end(cs);
}
static void
tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
@@ -955,48 +753,10 @@ tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
uint32_t gmem_a)
{
const struct tu_framebuffer *fb = cmd->state.framebuffer;
const struct tu_image_view *dst = fb->attachments[a].attachment;
const struct tu_image_view *src = fb->attachments[gmem_a].attachment;
struct tu_image_view *dst = fb->attachments[a].attachment;
struct tu_image_view *src = fb->attachments[gmem_a].attachment;
tu_blit(cmd, cs, &(struct tu_blit) {
.dst = sysmem_attachment_surf(dst, dst->base_layer,
&cmd->state.tiling_config.render_area),
.src = sysmem_attachment_surf(src, src->base_layer,
&cmd->state.tiling_config.render_area),
.layers = fb->layers,
});
}
/* Emit a MSAA resolve operation, with both gmem and sysmem paths. */
static void tu6_emit_resolve(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
uint32_t gmem_a)
{
if (cmd->state.pass->attachments[a].store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE)
return;
tu6_emit_predicated_blit(cmd, cs, a, gmem_a, true);
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
tu6_emit_sysmem_resolve(cmd, cs, a, gmem_a);
tu_cond_exec_end(cs);
}
static void
tu6_emit_store_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
uint32_t gmem_a)
{
if (cmd->state.pass->attachments[a].store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE)
return;
tu6_emit_blit_info(cmd, cs,
cmd->state.framebuffer->attachments[a].attachment,
cmd->state.pass->attachments[gmem_a].gmem_offset, true);
tu6_emit_blit(cmd, cs);
tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.tiling_config.render_area);
}
static void
@@ -1018,19 +778,20 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
tu6_emit_blit_scissor(cmd, cs, true);
/* blit scissor may have been changed by CmdClearAttachments */
tu6_emit_blit_scissor(cmd, cs, false);
for (uint32_t a = 0; a < pass->attachment_count; ++a) {
if (pass->attachments[a].gmem_offset >= 0)
tu6_emit_store_attachment(cmd, cs, a, a);
tu_store_gmem_attachment(cmd, cs, a, a);
}
if (subpass->resolve_attachments) {
for (unsigned i = 0; i < subpass->color_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
if (a != VK_ATTACHMENT_UNUSED)
tu6_emit_store_attachment(cmd, cs, a,
subpass->color_attachments[i].attachment);
tu_store_gmem_attachment(cmd, cs, a,
subpass->color_attachments[i].attachment);
}
}
}
@@ -1331,7 +1092,7 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
uint32_t y2 = tiling->render_area.offset.y + tiling->render_area.extent.height - 1;
tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
tu6_emit_window_scissor(cs, x1, y1, x2, y2);
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
@@ -1395,44 +1156,6 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
cmd->wait_for_idle = false;
}
static void
tu_emit_sysmem_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
uint32_t a,
const VkRenderPassBeginInfo *info)
{
const struct tu_framebuffer *fb = cmd->state.framebuffer;
const struct tu_image_view *iview = fb->attachments[a].attachment;
const struct tu_render_pass_attachment *attachment =
&cmd->state.pass->attachments[a];
unsigned clear_mask = 0;
/* note: this means it isn't used by any subpass and shouldn't be cleared anyway */
if (attachment->gmem_offset < 0)
return;
if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
clear_mask = 0xf;
}
if (vk_format_has_stencil(iview->vk_format)) {
clear_mask &= 0x1;
if (attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
clear_mask |= 0x2;
if (clear_mask != 0x3)
tu_finishme("depth/stencil only load op");
}
if (!clear_mask)
return;
tu_clear_sysmem_attachment(cmd, cs, a,
&info->pClearValues[a], &(struct VkClearRect) {
.rect = info->renderArea,
.baseArrayLayer = iview->base_layer,
.layerCount = iview->layer_count,
});
}
static void
tu_emit_load_clear(struct tu_cmd_buffer *cmd,
const VkRenderPassBeginInfo *info)
@@ -1444,26 +1167,19 @@ tu_emit_load_clear(struct tu_cmd_buffer *cmd,
tu6_emit_blit_scissor(cmd, cs, true);
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
tu6_emit_load_attachment(cmd, cs, i);
tu_load_gmem_attachment(cmd, cs, i);
tu6_emit_blit_scissor(cmd, cs, false);
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
tu6_emit_clear_attachment(cmd, cs, i, info);
tu_clear_gmem_attachment(cmd, cs, i, info);
tu_cond_exec_end(cs);
/* invalidate because reading input attachments will cache GMEM and
* the cache isn''t updated when GMEM is written
* TODO: is there a no-cache bit for textures?
*/
if (cmd->state.subpass->input_count)
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
tu_emit_sysmem_clear_attachment(cmd, cs, i, info);
tu_clear_sysmem_attachment(cmd, cs, i, info);
tu_cond_exec_end(cs);
}
@@ -1476,8 +1192,8 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
const struct tu_framebuffer *fb = cmd->state.framebuffer;
assert(fb->width > 0 && fb->height > 0);
tu6_emit_window_scissor(cmd, cs, 0, 0, fb->width - 1, fb->height - 1);
tu6_emit_window_offset(cmd, cs, 0, 0);
tu6_emit_window_scissor(cs, 0, 0, fb->width - 1, fb->height - 1);
tu6_emit_window_offset(cs, 0, 0);
tu6_emit_bin_size(cs, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */
@@ -1516,7 +1232,6 @@ tu6_sysmem_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
/* Do any resolves of the last subpass. These are handled in the
* tile_store_ib in the gmem path.
*/
const struct tu_subpass *subpass = cmd->state.subpass;
if (subpass->resolve_attachments) {
for (unsigned i = 0; i < subpass->color_count; i++) {
@@ -1555,7 +1270,13 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
tu_cs_emit(cs, 0x0);
tu6_emit_wfi(cmd, cs);
/* TODO: flushing with barriers instead of blindly always flushing */
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR, false);
tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH, false);
tu_cs_emit_wfi(cs);
tu_cs_emit_regs(cs,
A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_gmem, .gmem = 1));
@@ -1684,7 +1405,7 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd)
static void
tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
{
const uint32_t tile_store_space = 32 + 23 * cmd->state.pass->attachment_count;
const uint32_t tile_store_space = 11 + (35 * 2) * cmd->state.pass->attachment_count;
struct tu_cs sub_cs;
VkResult result =
@@ -1708,7 +1429,7 @@ tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
struct tu_tiling_config *tiling = &cmd->state.tiling_config;
tiling->render_area = *render_area;
tiling->force_sysmem = force_sysmem(cmd, render_area);
tiling->force_sysmem = false;
tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass->gmem_pixels);
tu_tiling_config_update_pipe_layout(tiling, dev);
@@ -2583,7 +2304,7 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs);
tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs);
tu6_emit_msaa(cmd, cmd->state.subpass, &cmd->draw_cs);
tu6_emit_msaa(&cmd->draw_cs, cmd->state.subpass->samples);
tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false);
/* note: use_hw_binning only checks tiling config */
@@ -2614,53 +2335,66 @@ tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
struct tu_cs *cs = &cmd->draw_cs;
const struct tu_subpass *subpass = cmd->state.subpass++;
/* TODO:
* if msaa samples change between subpasses,
* attachment store is broken for some attachments
*/
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
if (subpass->resolve_attachments) {
tu6_emit_blit_scissor(cmd, cs, true);
for (unsigned i = 0; i < subpass->color_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
if (a != VK_ATTACHMENT_UNUSED) {
tu6_emit_resolve(cmd, cs, a,
subpass->color_attachments[i].attachment);
}
if (a == VK_ATTACHMENT_UNUSED)
continue;
tu_store_gmem_attachment(cmd, cs, a,
subpass->color_attachments[i].attachment);
if (pass->attachments[a].gmem_offset < 0)
continue;
/* TODO:
* check if the resolved attachment is needed by later subpasses,
* if it is, should be doing a GMEM->GMEM resolve instead of GMEM->MEM->GMEM..
*/
tu_finishme("missing GMEM->GMEM resolve path\n");
tu_emit_load_gmem_attachment(cmd, cs, a);
}
}
/* invalidate because reading input attachments will cache GMEM and
* the cache isn''t updated when GMEM is written
* TODO: is there a no-cache bit for textures?
tu_cond_exec_end(cs);
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
/* Emit flushes so that input attachments will read the correct value.
* TODO: use subpass dependencies to flush or not
*/
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
if (subpass->resolve_attachments) {
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
for (unsigned i = 0; i < subpass->color_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
if (a == VK_ATTACHMENT_UNUSED)
continue;
tu6_emit_sysmem_resolve(cmd, cs, a,
subpass->color_attachments[i].attachment);
}
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
}
tu_cond_exec_end(cs);
/* subpass->input_count > 0 then texture cache invalidate is likely to be needed */
if (cmd->state.subpass->input_count)
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
/* emit mrt/zs/msaa/ubwc state for the subpass that is starting */
tu6_emit_zs(cmd, cmd->state.subpass, cs);
tu6_emit_mrt(cmd, cmd->state.subpass, cs);
tu6_emit_msaa(cmd, cmd->state.subpass, cs);
tu6_emit_msaa(cs, cmd->state.subpass->samples);
tu6_emit_render_cntl(cmd, cmd->state.subpass, cs, false);
/* Emit flushes so that input attachments will read the correct value. This
* is for sysmem only, although it shouldn't do much harm on gmem.
*/
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
/* TODO:
* since we don't know how to do GMEM->GMEM resolve,
* resolve attachments are resolved to memory then loaded to GMEM again if needed
*/
if (subpass->resolve_attachments) {
for (unsigned i = 0; i < subpass->color_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
if (a != VK_ATTACHMENT_UNUSED && pass->attachments[a].gmem_offset >= 0) {
tu_finishme("missing GMEM->GMEM resolve, performance will suffer\n");
tu6_emit_predicated_blit(cmd, cs, a, a, false);
}
}
}
}
void
@@ -4137,7 +3871,7 @@ struct tu_barrier_info
};
static void
tu_barrier(struct tu_cmd_buffer *cmd_buffer,
tu_barrier(struct tu_cmd_buffer *cmd,
uint32_t memoryBarrierCount,
const VkMemoryBarrier *pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
@@ -4146,13 +3880,24 @@ tu_barrier(struct tu_cmd_buffer *cmd_buffer,
const VkImageMemoryBarrier *pImageMemoryBarriers,
const struct tu_barrier_info *info)
{
/* renderpass case is only for subpass self-dependencies
* which means syncing the render output with texture cache
* note: only the CACHE_INVALIDATE is needed in GMEM mode
* and in sysmem mode we might not need either color/depth flush
*/
if (cmd->state.pass) {
tu6_emit_event_write(cmd, &cmd->draw_cs, PC_CCU_FLUSH_COLOR_TS, true);
tu6_emit_event_write(cmd, &cmd->draw_cs, PC_CCU_FLUSH_DEPTH_TS, true);
tu6_emit_event_write(cmd, &cmd->draw_cs, CACHE_INVALIDATE, false);
return;
}
}
void
tu_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags destStageMask,
VkBool32 byRegion,
VkPipelineStageFlags dstStageMask,
VkDependencyFlags dependencyFlags,
uint32_t memoryBarrierCount,
const VkMemoryBarrier *pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,

View File

@@ -28,10 +28,6 @@
#include "registers/adreno_common.xml.h"
#include "registers/a6xx.xml.h"
#include "util/format_r11g11b10f.h"
#include "util/format_rgb9e5.h"
#include "util/format_srgb.h"
#include "util/u_half.h"
#include "vk_format.h"
#include "vk_util.h"
#include "drm-uapi/drm_fourcc.h"
@@ -222,13 +218,13 @@ static const struct tu_native_format tu6_format_table[] = {
TU6_xTx(E5B9G9R9_UFLOAT_PACK32, 9_9_9_E5_FLOAT, WZYX), /* 123 */
/* depth/stencil */
TU6_xTC(D16_UNORM, 16_UNORM, WZYX), /* 124 */
TU6_xTC(X8_D24_UNORM_PACK32, Z24_UNORM_S8_UINT, WZYX), /* 125 */
TU6_xTC(D32_SFLOAT, 32_FLOAT, WZYX), /* 126 */
TU6_xTC(S8_UINT, 8_UINT, WZYX), /* 127 */
TU6_xxx(D16_UNORM_S8_UINT, X8Z16_UNORM, WZYX), /* 128 */
TU6_xTC(D24_UNORM_S8_UINT, Z24_UNORM_S8_UINT, WZYX), /* 129 */
TU6_xxx(D32_SFLOAT_S8_UINT, x, WZYX), /* 130 */
TU6_xTC(D16_UNORM, 16_UNORM, WZYX), /* 124 */
TU6_xTC(X8_D24_UNORM_PACK32, Z24_UNORM_S8_UINT_AS_R8G8B8A8, WZYX), /* 125 */
TU6_xTC(D32_SFLOAT, 32_FLOAT, WZYX), /* 126 */
TU6_xTC(S8_UINT, 8_UINT, WZYX), /* 127 */
TU6_xxx(D16_UNORM_S8_UINT, X8Z16_UNORM, WZYX), /* 128 */
TU6_xTC(D24_UNORM_S8_UINT, Z24_UNORM_S8_UINT_AS_R8G8B8A8, WZYX), /* 129 */
TU6_xxx(D32_SFLOAT_S8_UINT, x, WZYX), /* 130 */
/* compressed */
TU6_xTx(BC1_RGB_UNORM_BLOCK, DXT1, WZYX), /* 131 */
@@ -348,75 +344,6 @@ tu6_format_texture(VkFormat format, enum a6xx_tile_mode tile_mode)
return fmt;
}
enum a6xx_2d_ifmt
tu6_fmt_to_ifmt(enum a6xx_format fmt)
{
switch (fmt) {
case FMT6_A8_UNORM:
case FMT6_8_UNORM:
case FMT6_8_SNORM:
case FMT6_8_8_UNORM:
case FMT6_8_8_SNORM:
case FMT6_8_8_8_8_UNORM:
case FMT6_8_8_8_X8_UNORM:
case FMT6_8_8_8_8_SNORM:
case FMT6_4_4_4_4_UNORM:
case FMT6_5_5_5_1_UNORM:
case FMT6_5_6_5_UNORM:
case FMT6_Z24_UNORM_S8_UINT:
case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
return R2D_UNORM8;
case FMT6_32_UINT:
case FMT6_32_SINT:
case FMT6_32_32_UINT:
case FMT6_32_32_SINT:
case FMT6_32_32_32_32_UINT:
case FMT6_32_32_32_32_SINT:
return R2D_INT32;
case FMT6_16_UINT:
case FMT6_16_SINT:
case FMT6_16_16_UINT:
case FMT6_16_16_SINT:
case FMT6_16_16_16_16_UINT:
case FMT6_16_16_16_16_SINT:
case FMT6_10_10_10_2_UINT:
return R2D_INT16;
case FMT6_8_UINT:
case FMT6_8_SINT:
case FMT6_8_8_UINT:
case FMT6_8_8_SINT:
case FMT6_8_8_8_8_UINT:
case FMT6_8_8_8_8_SINT:
return R2D_INT8;
case FMT6_16_UNORM:
case FMT6_16_SNORM:
case FMT6_16_16_UNORM:
case FMT6_16_16_SNORM:
case FMT6_16_16_16_16_UNORM:
case FMT6_16_16_16_16_SNORM:
case FMT6_32_FLOAT:
case FMT6_32_32_FLOAT:
case FMT6_32_32_32_32_FLOAT:
return R2D_FLOAT32;
case FMT6_16_FLOAT:
case FMT6_16_16_FLOAT:
case FMT6_16_16_16_16_FLOAT:
case FMT6_11_11_10_FLOAT:
case FMT6_10_10_10_2_UNORM:
case FMT6_10_10_10_2_UNORM_DEST:
return R2D_FLOAT16;
default:
unreachable("bad format");
return 0;
}
}
enum a6xx_depth_format
tu6_pipe2depth(VkFormat format)
{
@@ -433,306 +360,6 @@ tu6_pipe2depth(VkFormat format)
}
}
static uint32_t
tu_pack_mask(int bits)
{
assert(bits <= 32);
return (1ull << bits) - 1;
}
static uint32_t
tu_pack_float32_for_unorm(float val, int bits)
{
const uint32_t max = tu_pack_mask(bits);
if (val < 0.0f)
return 0;
else if (val > 1.0f)
return max;
else
return _mesa_lroundevenf(val * (float) max);
}
static uint32_t
tu_pack_float32_for_snorm(float val, int bits)
{
const int32_t max = tu_pack_mask(bits - 1);
int32_t tmp;
if (val < -1.0f)
tmp = -max;
else if (val > 1.0f)
tmp = max;
else
tmp = _mesa_lroundevenf(val * (float) max);
return tmp & tu_pack_mask(bits);
}
static uint32_t
tu_pack_float32_for_uscaled(float val, int bits)
{
const uint32_t max = tu_pack_mask(bits);
if (val < 0.0f)
return 0;
else if (val > (float) max)
return max;
else
return (uint32_t) val;
}
static uint32_t
tu_pack_float32_for_sscaled(float val, int bits)
{
const int32_t max = tu_pack_mask(bits - 1);
const int32_t min = -max - 1;
int32_t tmp;
if (val < (float) min)
tmp = min;
else if (val > (float) max)
tmp = max;
else
tmp = (int32_t) val;
return tmp & tu_pack_mask(bits);
}
static uint32_t
tu_pack_uint32_for_uint(uint32_t val, int bits)
{
return val & tu_pack_mask(bits);
}
static uint32_t
tu_pack_int32_for_sint(int32_t val, int bits)
{
return val & tu_pack_mask(bits);
}
static uint32_t
tu_pack_float32_for_sfloat(float val, int bits)
{
assert(bits == 16 || bits == 32);
return bits == 16 ? util_float_to_half(val) : fui(val);
}
union tu_clear_component_value {
float float32;
int32_t int32;
uint32_t uint32;
};
static uint32_t
tu_pack_clear_component_value(union tu_clear_component_value val,
const struct util_format_channel_description *ch)
{
uint32_t packed;
switch (ch->type) {
case UTIL_FORMAT_TYPE_UNSIGNED:
/* normalized, scaled, or pure integer */
if (ch->normalized)
packed = tu_pack_float32_for_unorm(val.float32, ch->size);
else if (ch->pure_integer)
packed = tu_pack_uint32_for_uint(val.uint32, ch->size);
else
packed = tu_pack_float32_for_uscaled(val.float32, ch->size);
break;
case UTIL_FORMAT_TYPE_SIGNED:
/* normalized, scaled, or pure integer */
if (ch->normalized)
packed = tu_pack_float32_for_snorm(val.float32, ch->size);
else if (ch->pure_integer)
packed = tu_pack_int32_for_sint(val.int32, ch->size);
else
packed = tu_pack_float32_for_sscaled(val.float32, ch->size);
break;
case UTIL_FORMAT_TYPE_FLOAT:
packed = tu_pack_float32_for_sfloat(val.float32, ch->size);
break;
default:
unreachable("unexpected channel type");
packed = 0;
break;
}
assert((packed & tu_pack_mask(ch->size)) == packed);
return packed;
}
static const struct util_format_channel_description *
tu_get_format_channel_description(const struct util_format_description *desc,
int comp)
{
switch (desc->swizzle[comp]) {
case PIPE_SWIZZLE_X:
return &desc->channel[0];
case PIPE_SWIZZLE_Y:
return &desc->channel[1];
case PIPE_SWIZZLE_Z:
return &desc->channel[2];
case PIPE_SWIZZLE_W:
return &desc->channel[3];
default:
return NULL;
}
}
static union tu_clear_component_value
tu_get_clear_component_value(const VkClearValue *val, int comp,
enum util_format_colorspace colorspace)
{
assert(comp < 4);
union tu_clear_component_value tmp;
switch (colorspace) {
case UTIL_FORMAT_COLORSPACE_ZS:
assert(comp < 2);
if (comp == 0)
tmp.float32 = val->depthStencil.depth;
else
tmp.uint32 = val->depthStencil.stencil;
break;
case UTIL_FORMAT_COLORSPACE_SRGB:
if (comp < 3) {
tmp.float32 = util_format_linear_to_srgb_float(val->color.float32[comp]);
break;
}
default:
assert(comp < 4);
tmp.uint32 = val->color.uint32[comp];
break;
}
return tmp;
}
/**
* Pack a VkClearValue into a 128-bit buffer. \a format is respected except
* for the component order. The components are always packed in WZYX order
* (i.e., msb is white and lsb is red).
*
* Return the number of uint32_t's used.
*/
void
tu_pack_clear_value(const VkClearValue *val, VkFormat format, uint32_t buf[4])
{
const struct util_format_description *desc = vk_format_description(format);
switch (format) {
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
buf[0] = float3_to_r11g11b10f(val->color.float32);
return;
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
buf[0] = float3_to_rgb9e5(val->color.float32);
return;
default:
break;
}
assert(desc && desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
/* S8_UINT is special and has no depth */
const int max_components =
format == VK_FORMAT_S8_UINT ? 2 : desc->nr_channels;
int buf_offset = 0;
int bit_shift = 0;
for (int comp = 0; comp < max_components; comp++) {
const struct util_format_channel_description *ch =
tu_get_format_channel_description(desc, comp);
if (!ch) {
assert((format == VK_FORMAT_S8_UINT && comp == 0) ||
(format == VK_FORMAT_X8_D24_UNORM_PACK32 && comp == 1));
continue;
}
union tu_clear_component_value v = tu_get_clear_component_value(
val, comp, desc->colorspace);
/* move to the next uint32_t when there is not enough space */
assert(ch->size <= 32);
if (bit_shift + ch->size > 32) {
buf_offset++;
bit_shift = 0;
}
if (bit_shift == 0)
buf[buf_offset] = 0;
buf[buf_offset] |= tu_pack_clear_component_value(v, ch) << bit_shift;
bit_shift += ch->size;
}
}
void
tu_2d_clear_color(const VkClearColorValue *val, VkFormat format, uint32_t buf[4])
{
const struct util_format_description *desc = vk_format_description(format);
/* not supported by 2D engine, cleared as U32 */
if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
buf[0] = float3_to_rgb9e5(val->float32);
return;
}
enum a6xx_2d_ifmt ifmt = tu6_fmt_to_ifmt(tu6_get_native_format(format).fmt);
assert(desc && (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
format == VK_FORMAT_B10G11R11_UFLOAT_PACK32));
for (unsigned i = 0; i < desc->nr_channels; i++) {
const struct util_format_channel_description *ch = &desc->channel[i];
switch (ifmt) {
case R2D_INT32:
case R2D_INT16:
case R2D_INT8:
case R2D_FLOAT32:
buf[i] = val->uint32[i];
break;
case R2D_FLOAT16:
buf[i] = util_float_to_half(val->float32[i]);
break;
case R2D_UNORM8: {
float linear = val->float32[i];
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && i < 3)
linear = util_format_linear_to_srgb_float(val->float32[i]);
if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
buf[i] = tu_pack_float32_for_snorm(linear, 8);
else
buf[i] = tu_pack_float32_for_unorm(linear, 8);
} break;
default:
unreachable("unexpected ifmt");
break;
}
}
}
void
tu_2d_clear_zs(const VkClearDepthStencilValue *val, VkFormat format, uint32_t buf[4])
{
switch (format) {
case VK_FORMAT_X8_D24_UNORM_PACK32:
case VK_FORMAT_D24_UNORM_S8_UINT:
buf[0] = tu_pack_float32_for_unorm(val->depth, 24);
buf[1] = buf[0] >> 8;
buf[2] = buf[0] >> 16;
buf[3] = val->stencil;
return;
case VK_FORMAT_D16_UNORM:
case VK_FORMAT_D32_SFLOAT:
buf[0] = fui(val->depth);
return;
case VK_FORMAT_S8_UINT:
buf[0] = val->stencil;
return;
default:
unreachable("unexpected zs format");
break;
}
}
static void
tu_physical_device_get_format_properties(
struct tu_physical_device *physical_device,

View File

@@ -111,13 +111,6 @@ tu_image_create(VkDevice _device,
ubwc_enabled = false;
}
/* using UBWC with D24S8 breaks the "stencil read" copy path (why?)
* (causes any deqp tests that need to check stencil to fail)
* disable UBWC for this format until we properly support copy aspect masks
*/
if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
ubwc_enabled = false;
/* UBWC can't be used with E5B9G9R9 */
if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
ubwc_enabled = false;
@@ -166,7 +159,7 @@ tu_image_create(VkDevice _device,
return VK_SUCCESS;
}
static enum a6xx_tex_fetchsize
enum a6xx_tex_fetchsize
tu6_fetchsize(VkFormat format)
{
if (vk_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
@@ -277,24 +270,27 @@ tu_image_view_init(struct tu_image_view *iview,
memset(iview->descriptor, 0, sizeof(iview->descriptor));
struct tu_native_format fmt =
tu6_format_texture(iview->vk_format, image->layout.tile_mode);
tu6_format_image_src(image, iview->vk_format, iview->base_mip);
uint64_t base_addr = tu_image_base(image, iview->base_mip, iview->base_layer);
uint64_t ubwc_addr = tu_image_ubwc_base(image, iview->base_mip, iview->base_layer);
uint32_t pitch = tu_image_stride(image, iview->base_mip) / vk_format_get_blockwidth(iview->vk_format);
enum a6xx_tile_mode tile_mode = tu6_get_image_tile_mode(image, iview->base_mip);
uint32_t pitch = tu_image_pitch(image, iview->base_mip);
uint32_t width = iview->extent.width;
uint32_t height = iview->extent.height;
uint32_t depth = pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_3D ?
iview->extent.depth : iview->layer_count;
unsigned fmt_tex = fmt.fmt;
if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT &&
iview->vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
fmt_tex = FMT6_S8Z24_UINT;
if (fmt_tex == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8) {
if (iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
fmt_tex = FMT6_Z24_UNORM_S8_UINT;
if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
fmt_tex = FMT6_S8Z24_UINT;
/* TODO: also use this format with storage descriptor ? */
}
iview->descriptor[0] =
A6XX_TEX_CONST_0_TILE_MODE(tile_mode) |
A6XX_TEX_CONST_0_TILE_MODE(fmt.tile_mode) |
COND(vk_format_is_srgb(iview->vk_format), A6XX_TEX_CONST_0_SRGB) |
A6XX_TEX_CONST_0_FMT(fmt_tex) |
A6XX_TEX_CONST_0_SAMPLES(tu_msaa_samples(image->samples)) |
@@ -335,7 +331,7 @@ tu_image_view_init(struct tu_image_view *iview,
iview->storage_descriptor[0] =
A6XX_IBO_0_FMT(fmt.fmt) |
A6XX_IBO_0_TILE_MODE(tile_mode);
A6XX_IBO_0_TILE_MODE(fmt.tile_mode);
iview->storage_descriptor[1] =
A6XX_IBO_1_WIDTH(width) |
A6XX_IBO_1_HEIGHT(height);

View File

@@ -1,91 +0,0 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "tu_private.h"
#include "tu_blit.h"
static void
tu_blit_image(struct tu_cmd_buffer *cmdbuf,
struct tu_image *src_image,
struct tu_image *dst_image,
const VkImageBlit *info,
VkFilter filter)
{
static const enum a6xx_rotation rotate[2][2] = {
{ROTATE_0, ROTATE_HFLIP},
{ROTATE_VFLIP, ROTATE_180},
};
bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) !=
(info->dstOffsets[1].x < info->dstOffsets[0].x);
bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) !=
(info->dstOffsets[1].y < info->dstOffsets[0].y);
bool mirror_z = (info->srcOffsets[1].z < info->srcOffsets[0].z) !=
(info->dstOffsets[1].z < info->dstOffsets[0].z);
if (mirror_z) {
tu_finishme("blit z mirror\n");
return;
}
if (info->srcOffsets[1].z - info->srcOffsets[0].z !=
info->dstOffsets[1].z - info->dstOffsets[0].z) {
tu_finishme("blit z filter\n");
return;
}
assert(info->dstSubresource.layerCount == info->srcSubresource.layerCount);
struct tu_blit blt = {
.dst = tu_blit_surf(dst_image, info->dstSubresource, info->dstOffsets),
.src = tu_blit_surf(src_image, info->srcSubresource, info->srcOffsets),
.layers = MAX2(info->srcOffsets[1].z - info->srcOffsets[0].z,
info->dstSubresource.layerCount),
.filter = filter == VK_FILTER_LINEAR,
.rotation = rotate[mirror_y][mirror_x],
};
tu_blit(cmdbuf, &cmdbuf->cs, &blt);
}
void
tu_CmdBlitImage(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkImageBlit *pRegions,
VkFilter filter)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_image, src_image, srcImage);
TU_FROM_HANDLE(tu_image, dst_image, destImage);
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
for (uint32_t i = 0; i < regionCount; ++i) {
tu_blit_image(cmdbuf, src_image, dst_image, pRegions + i, filter);
}
}

View File

@@ -1,75 +0,0 @@
#include "tu_private.h"
#include "tu_blit.h"
#include "tu_cs.h"
void
tu_CmdFillBuffer(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize fillSize,
uint32_t data)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
if (fillSize == VK_WHOLE_SIZE)
fillSize = buffer->size - dstOffset;
tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE);
tu_blit(cmd, &cmd->cs, &(struct tu_blit) {
.dst = {
.fmt = VK_FORMAT_R32_UINT,
.va = tu_buffer_iova(buffer) + dstOffset,
.width = fillSize / 4,
.height = 1,
.samples = 1,
},
.layers = 1,
.clear_value[0] = data,
.type = TU_BLIT_CLEAR,
.buffer = true,
});
}
void
tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const void *pData)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE);
struct ts_cs_memory tmp;
VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64, &tmp);
if (result != VK_SUCCESS) {
cmd->record_result = result;
return;
}
memcpy(tmp.map, pData, dataSize);
tu_blit(cmd, &cmd->cs, &(struct tu_blit) {
.dst = {
.fmt = VK_FORMAT_R32_UINT,
.va = tu_buffer_iova(buffer) + dstOffset,
.width = dataSize / 4,
.height = 1,
.samples = 1,
},
.src = {
.fmt = VK_FORMAT_R32_UINT,
.va = tmp.iova,
.width = dataSize / 4,
.height = 1,
.samples = 1,
},
.layers = 1,
.type = TU_BLIT_COPY,
.buffer = true,
});
}

View File

@@ -1,238 +0,0 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "tu_private.h"
#include "tu_blit.h"
#include "tu_cs.h"
static void
clear_image(struct tu_cmd_buffer *cmdbuf,
struct tu_image *image,
uint32_t clear_value[4],
const VkImageSubresourceRange *range)
{
uint32_t level_count = tu_get_levelCount(image, range);
uint32_t layer_count = tu_get_layerCount(image, range);
if (image->type == VK_IMAGE_TYPE_3D) {
assert(layer_count == 1);
assert(range->baseArrayLayer == 0);
}
for (unsigned j = 0; j < level_count; j++) {
if (image->type == VK_IMAGE_TYPE_3D)
layer_count = u_minify(image->extent.depth, range->baseMipLevel + j);
tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
.dst = tu_blit_surf_whole(image, range->baseMipLevel + j, range->baseArrayLayer),
.layers = layer_count,
.clear_value = {clear_value[0], clear_value[1], clear_value[2], clear_value[3]},
.type = TU_BLIT_CLEAR,
});
}
}
void
tu_CmdClearColorImage(VkCommandBuffer commandBuffer,
VkImage image_h,
VkImageLayout imageLayout,
const VkClearColorValue *pColor,
uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_image, image, image_h);
uint32_t clear_value[4] = {};
tu_2d_clear_color(pColor, image->vk_format, clear_value);
tu_bo_list_add(&cmdbuf->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
for (unsigned i = 0; i < rangeCount; i++)
clear_image(cmdbuf, image, clear_value, pRanges + i);
}
void
tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
VkImage image_h,
VkImageLayout imageLayout,
const VkClearDepthStencilValue *pDepthStencil,
uint32_t rangeCount,
const VkImageSubresourceRange *pRanges)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_image, image, image_h);
uint32_t clear_value[4] = {};
tu_2d_clear_zs(pDepthStencil, image->vk_format, clear_value);
tu_bo_list_add(&cmdbuf->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
for (unsigned i = 0; i < rangeCount; i++)
clear_image(cmdbuf, image, clear_value, pRanges + i);
}
void
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t attachment,
const VkClearValue *value,
const VkClearRect *rect)
{
if (!cmd->state.framebuffer) {
tu_finishme("sysmem CmdClearAttachments in secondary command buffer");
return;
}
const struct tu_image_view *iview =
cmd->state.framebuffer->attachments[attachment].attachment;
uint32_t clear_vals[4] = { 0 };
if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT)) {
tu_2d_clear_zs(&value->depthStencil, iview->vk_format,
clear_vals);
} else {
tu_2d_clear_color(&value->color, iview->vk_format,
clear_vals);
}
tu_blit(cmd, cs, &(struct tu_blit) {
.dst = sysmem_attachment_surf(iview, rect->baseArrayLayer, &rect->rect),
.layers = rect->layerCount,
.clear_value = { clear_vals[0], clear_vals[1], clear_vals[2], clear_vals[3] },
.type = TU_BLIT_CLEAR,
});
}
void
tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t attachment,
uint8_t component_mask,
const VkClearValue *value)
{
VkFormat fmt = cmd->state.pass->attachments[attachment].format;
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(fmt)));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(component_mask));
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
tu_cs_emit(cs, cmd->state.pass->attachments[attachment].gmem_offset);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
tu_cs_emit(cs, 0);
uint32_t clear_vals[4] = { 0 };
tu_pack_clear_value(value, fmt, clear_vals);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
tu_cs_emit(cs, clear_vals[0]);
tu_cs_emit(cs, clear_vals[1]);
tu_cs_emit(cs, clear_vals[2]);
tu_cs_emit(cs, clear_vals[3]);
tu6_emit_event_write(cmd, cs, BLIT, false);
}
void
tu_CmdClearAttachments(VkCommandBuffer commandBuffer,
uint32_t attachmentCount,
const VkClearAttachment *pAttachments,
uint32_t rectCount,
const VkClearRect *pRects)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
const struct tu_subpass *subpass = cmd->state.subpass;
struct tu_cs *cs = &cmd->draw_cs;
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
for (unsigned i = 0; i < rectCount; i++) {
unsigned x1 = pRects[i].rect.offset.x;
unsigned y1 = pRects[i].rect.offset.y;
unsigned x2 = x1 + pRects[i].rect.extent.width - 1;
unsigned y2 = y1 + pRects[i].rect.extent.height - 1;
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
for (unsigned j = 0; j < attachmentCount; j++) {
uint32_t a;
unsigned clear_mask = 0;
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
clear_mask = 0xf;
a = subpass->color_attachments[pAttachments[j].colorAttachment].attachment;
} else {
a = subpass->depth_stencil_attachment.attachment;
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
clear_mask |= 1;
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
clear_mask |= 2;
}
if (a == VK_ATTACHMENT_UNUSED)
continue;
tu_clear_gmem_attachment(cmd, cs, a, clear_mask,
&pAttachments[j].clearValue);
}
}
tu_cond_exec_end(cs);
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
for (unsigned i = 0; i < rectCount; i++) {
for (unsigned j = 0; j < attachmentCount; j++) {
uint32_t a;
unsigned clear_mask = 0;
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
clear_mask = 0xf;
a = subpass->color_attachments[pAttachments[j].colorAttachment].attachment;
} else {
a = subpass->depth_stencil_attachment.attachment;
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
clear_mask |= 1;
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
clear_mask |= 2;
if (clear_mask != 3)
tu_finishme("sysmem depth/stencil only clears");
}
if (a == VK_ATTACHMENT_UNUSED)
continue;
tu_clear_sysmem_attachment(cmd, cs, a,
&pAttachments[j].clearValue,
&pRects[i]);
}
}
tu_cond_exec_end(cs);
}

View File

@@ -1,215 +0,0 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "tu_private.h"
#include "a6xx.xml.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
#include "vk_format.h"
#include "tu_cs.h"
#include "tu_blit.h"
static void
tu_copy_buffer(struct tu_cmd_buffer *cmd,
struct tu_buffer *src,
struct tu_buffer *dst,
const VkBufferCopy *region)
{
tu_bo_list_add(&cmd->bo_list, src->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmd->bo_list, dst->bo, MSM_SUBMIT_BO_WRITE);
tu_blit(cmd, &cmd->cs, &(struct tu_blit) {
.dst = {
.fmt = VK_FORMAT_R8_UNORM,
.va = tu_buffer_iova(dst) + region->dstOffset,
.width = region->size,
.height = 1,
.samples = 1,
},
.src = {
.fmt = VK_FORMAT_R8_UNORM,
.va = tu_buffer_iova(src) + region->srcOffset,
.width = region->size,
.height = 1,
.samples = 1,
},
.layers = 1,
.type = TU_BLIT_COPY,
.buffer = true,
});
}
static struct tu_blit_surf
tu_blit_buffer(struct tu_buffer *buffer,
VkFormat format,
const VkBufferImageCopy *info)
{
if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
format = VK_FORMAT_R8_UNORM;
unsigned pitch = (info->bufferRowLength ?: info->imageExtent.width) *
vk_format_get_blocksize(format);
return (struct tu_blit_surf) {
.fmt = format,
.tile_mode = TILE6_LINEAR,
.va = tu_buffer_iova(buffer) + info->bufferOffset,
.pitch = pitch,
.layer_size = (info->bufferImageHeight ?: info->imageExtent.height) * pitch / vk_format_get_blockwidth(format) / vk_format_get_blockheight(format),
.width = info->imageExtent.width,
.height = info->imageExtent.height,
.samples = 1,
};
}
static void
tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf,
struct tu_buffer *src_buffer,
struct tu_image *dst_image,
const VkBufferImageCopy *info)
{
if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
vk_format_get_blocksize(dst_image->vk_format) == 4) {
tu_finishme("aspect mask\n");
return;
}
tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
.dst = tu_blit_surf_ext(dst_image, info->imageSubresource, info->imageOffset, info->imageExtent),
.src = tu_blit_buffer(src_buffer, dst_image->vk_format, info),
.layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
.type = TU_BLIT_COPY,
});
}
static void
tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
struct tu_image *src_image,
struct tu_buffer *dst_buffer,
const VkBufferImageCopy *info)
{
tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
.dst = tu_blit_buffer(dst_buffer, src_image->vk_format, info),
.src = tu_blit_surf_ext(src_image, info->imageSubresource, info->imageOffset, info->imageExtent),
.layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
.type = TU_BLIT_COPY,
});
}
static void
tu_copy_image_to_image(struct tu_cmd_buffer *cmdbuf,
struct tu_image *src_image,
struct tu_image *dst_image,
const VkImageCopy *info)
{
if ((info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
vk_format_get_blocksize(dst_image->vk_format) == 4) ||
(info->srcSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
vk_format_get_blocksize(src_image->vk_format) == 4)) {
tu_finishme("aspect mask\n");
return;
}
tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
.dst = tu_blit_surf_ext(dst_image, info->dstSubresource, info->dstOffset, info->extent),
.src = tu_blit_surf_ext(src_image, info->srcSubresource, info->srcOffset, info->extent),
.layers = info->extent.depth,
.type = TU_BLIT_COPY,
});
}
void
tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferCopy *pRegions)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
for (unsigned i = 0; i < regionCount; ++i)
tu_copy_buffer(cmdbuf, src_buffer, dst_buffer, &pRegions[i]);
}
void
tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkBufferImageCopy *pRegions)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_image, dst_image, destImage);
TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
for (unsigned i = 0; i < regionCount; ++i)
tu_copy_buffer_to_image(cmdbuf, src_buffer, dst_image, pRegions + i);
}
void
tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferImageCopy *pRegions)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_image, src_image, srcImage);
TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
for (unsigned i = 0; i < regionCount; ++i)
tu_copy_image_to_buffer(cmdbuf, src_image, dst_buffer, pRegions + i);
}
void
tu_CmdCopyImage(VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkImageCopy *pRegions)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
TU_FROM_HANDLE(tu_image, src_image, srcImage);
TU_FROM_HANDLE(tu_image, dst_image, destImage);
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
for (uint32_t i = 0; i < regionCount; ++i)
tu_copy_image_to_image(cmdbuf, src_image, dst_image, pRegions + i);
}

View File

@@ -1,67 +0,0 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "tu_private.h"
#include <assert.h>
#include <stdbool.h>
#include "nir/nir_builder.h"
#include "vk_format.h"
#include "tu_blit.h"
static void
tu_resolve_image(struct tu_cmd_buffer *cmdbuf,
struct tu_image *src_image,
struct tu_image *dst_image,
const VkImageResolve *info)
{
assert(info->dstSubresource.layerCount == info->srcSubresource.layerCount);
tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
.dst = tu_blit_surf_ext(dst_image, info->dstSubresource, info->dstOffset, info->extent),
.src = tu_blit_surf_ext(src_image, info->srcSubresource, info->srcOffset, info->extent),
.layers = MAX2(info->extent.depth, info->dstSubresource.layerCount)
});
}
void
tu_CmdResolveImage(VkCommandBuffer cmd_buffer_h,
VkImage src_image_h,
VkImageLayout src_image_layout,
VkImage dest_image_h,
VkImageLayout dest_image_layout,
uint32_t region_count,
const VkImageResolve *regions)
{
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, cmd_buffer_h);
TU_FROM_HANDLE(tu_image, src_image, src_image_h);
TU_FROM_HANDLE(tu_image, dst_image, dest_image_h);
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
for (uint32_t i = 0; i < region_count; ++i)
tu_resolve_image(cmdbuf, src_image, dst_image, regions + i);
}

View File

@@ -39,7 +39,8 @@ static void update_samples(struct tu_subpass *subpass,
#define GMEM_ALIGN 0x4000
static void
compute_gmem_offsets(struct tu_render_pass *pass, uint32_t gmem_size)
compute_gmem_offsets(struct tu_render_pass *pass,
const struct tu_physical_device *phys_dev)
{
/* calculate total bytes per pixel */
uint32_t cpp_total = 0;
@@ -56,12 +57,14 @@ compute_gmem_offsets(struct tu_render_pass *pass, uint32_t gmem_size)
return;
}
/* TODO: this algorithm isn't optimal
/* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
* doesn't break things. maybe there is a better solution?
* TODO: this algorithm isn't optimal
* for example, two attachments with cpp = {1, 4}
* result: nblocks = {12, 52}, pixels = 196608
* optimal: nblocks = {13, 51}, pixels = 208896
*/
uint32_t gmem_blocks = gmem_size / GMEM_ALIGN;
uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / GMEM_ALIGN;
uint32_t offset = 0, pixels = ~0u;
for (uint32_t i = 0; i < pass->attachment_count; i++) {
struct tu_render_pass_attachment *att = &pass->attachments[i];
@@ -206,7 +209,7 @@ tu_CreateRenderPass(VkDevice _device,
*pRenderPass = tu_render_pass_to_handle(pass);
compute_gmem_offsets(pass, device->physical_device->gmem_size);
compute_gmem_offsets(pass, device->physical_device);
return VK_SUCCESS;
}
@@ -335,7 +338,7 @@ tu_CreateRenderPass2(VkDevice _device,
*pRenderPass = tu_render_pass_to_handle(pass);
compute_gmem_offsets(pass, device->physical_device->gmem_size);
compute_gmem_offsets(pass, device->physical_device);
return VK_SUCCESS;
}

View File

@@ -77,6 +77,8 @@ typedef uint32_t xcb_window_t;
#include "tu_entrypoints.h"
#include "vk_format.h"
#define MAX_VBS 32
#define MAX_VERTEX_ATTRIBS 32
#define MAX_RTS 8
@@ -1284,6 +1286,48 @@ tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back);
void
tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]);
void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples);
void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);
void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
struct tu_image_view;
void
tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
struct tu_image_view *src,
struct tu_image_view *dst,
uint32_t layers,
const VkRect2D *rect);
void
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
const VkRenderPassBeginInfo *info);
void
tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
const VkRenderPassBeginInfo *info);
void
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);
/* expose this function to be able to emit load without checking LOAD_OP */
void
tu_emit_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);
/* note: gmem store can also resolve */
void
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t a,
uint32_t gmem_a);
struct tu_userdata_info *
tu_lookup_user_sgpr(struct tu_pipeline *pipeline,
gl_shader_stage stage,
@@ -1330,18 +1374,6 @@ tu6_base_format(VkFormat format)
return tu6_format_color(format, TILE6_LINEAR).fmt;
}
void
tu_pack_clear_value(const VkClearValue *val,
VkFormat format,
uint32_t buf[4]);
void
tu_2d_clear_color(const VkClearColorValue *val, VkFormat format, uint32_t buf[4]);
void
tu_2d_clear_zs(const VkClearDepthStencilValue *val, VkFormat format, uint32_t buf[4]);
enum a6xx_2d_ifmt tu6_fmt_to_ifmt(enum a6xx_format fmt);
enum a6xx_depth_format tu6_pipe2depth(VkFormat format);
struct tu_image
@@ -1409,6 +1441,14 @@ tu_image_stride(struct tu_image *image, int level)
return image->layout.slices[level].pitch * image->layout.cpp;
}
/* to get the right pitch for compressed formats */
static inline uint32_t
tu_image_pitch(struct tu_image *image, int level)
{
uint32_t stride = tu_image_stride(image, level);
return stride / vk_format_get_blockwidth(image->vk_format);
}
static inline uint64_t
tu_image_base(struct tu_image *image, int level, int layer)
{
@@ -1458,10 +1498,16 @@ tu_image_ubwc_base(struct tu_image *image, int level, int layer)
#define tu_image_view_ubwc_base_ref(iview) \
tu_image_ubwc_base_ref(iview->image, iview->base_mip, iview->base_layer)
#define tu_image_view_ubwc_pitches(iview) \
.pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip), \
.array_pitch = tu_image_ubwc_size(iview->image, iview->base_mip) >> 2
enum a6xx_tile_mode
tu6_get_image_tile_mode(struct tu_image *image, int level);
enum a3xx_msaa_samples
tu_msaa_samples(uint32_t samples);
enum a6xx_tex_fetchsize
tu6_fetchsize(VkFormat format);
static inline struct tu_native_format
tu6_format_image(struct tu_image *image, VkFormat format, uint32_t level)
@@ -1705,21 +1751,6 @@ tu_gem_info_offset(const struct tu_device *dev, uint32_t gem_handle);
uint64_t
tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle);
void
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t attachment,
const VkClearValue *value,
const VkClearRect *rect);
void
tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
uint32_t attachment,
uint8_t component_mask,
const VkClearValue *value);
#define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \
\
static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \