turnip: new clear/blit implementation with shader path fallback
The shader path is used to implement the following cases: * stencil aspect mask on D24S8 (for image_to_buffer,buffer_to_image) * clear/copy msaa destination (2D engine can't have msaa dest) Signed-off-by: Jonathan Marek <jonathan@marek.ca> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3783>
This commit is contained in:

committed by
Marge Bot

parent
de6967488a
commit
2e084c2cb3
@@ -2383,24 +2383,27 @@ to upconvert to 32b float internally?
|
||||
<bitfield name="COLOR_FORMAT" low="0" high="7" type="a6xx_format"/>
|
||||
<bitfield name="TILE_MODE" low="8" high="9" type="a6xx_tile_mode"/>
|
||||
<bitfield name="COLOR_SWAP" low="10" high="11" type="a3xx_color_swap"/>
|
||||
<!-- b12 seems to be set when UBWC "FLAGS" buffer enabled -->
|
||||
<bitfield name="FLAGS" pos="12" type="boolean"/>
|
||||
<bitfield name="SRGB" pos="13" type="boolean"/>
|
||||
<!-- the rest is only for src -->
|
||||
<bitfield name="SAMPLES" low="14" high="15" type="a3xx_msaa_samples"/>
|
||||
<bitfield name="FILTER" pos="16" type="boolean"/>
|
||||
<bitfield name="SAMPLES_AVERAGE" pos="18" type="boolean"/>
|
||||
<bitfield name="UNK20" pos="20" type="boolean"/>
|
||||
<bitfield name="UNK22" pos="22" type="boolean"/>
|
||||
</bitset>
|
||||
|
||||
<reg32 offset="0x8c17" name="RB_2D_DST_INFO" type="a6xx_2d_surf_info"/>
|
||||
<reg32 offset="0x8c18" name="RB_2D_DST_LO"/>
|
||||
<reg32 offset="0x8c19" name="RB_2D_DST_HI"/>
|
||||
<reg64 offset="0x8c18" name="RB_2D_DST" type="waddress"/>
|
||||
<reg32 offset="0x8c1a" name="RB_2D_DST_SIZE">
|
||||
<bitfield name="PITCH" low="0" high="15" shr="6" type="uint"/>
|
||||
</reg32>
|
||||
|
||||
<reg32 offset="0x8c20" name="RB_2D_DST_FLAGS_LO"/>
|
||||
<reg32 offset="0x8c21" name="RB_2D_DST_FLAGS_HI"/>
|
||||
<reg64 offset="0x8c20" name="RB_2D_DST_FLAGS" type="waddress"/>
|
||||
<reg32 offset="0x8c22" name="RB_2D_DST_FLAGS_PITCH">
|
||||
<bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/>
|
||||
<bitfield name="ARRAY_PITCH" low="11" high="21" shr="7" type="uint"/>
|
||||
@@ -3120,12 +3123,14 @@ to upconvert to 32b float internally?
|
||||
</reg32>
|
||||
<reg32 offset="0xb4c2" name="SP_PS_2D_SRC_LO"/>
|
||||
<reg32 offset="0xb4c3" name="SP_PS_2D_SRC_HI"/>
|
||||
<reg64 offset="0xb4c2" name="SP_PS_2D_SRC" type="waddress"/>
|
||||
<reg32 offset="0xb4c4" name="SP_PS_2D_SRC_PITCH">
|
||||
<bitfield name="PITCH" low="9" high="24" shr="6" type="uint"/>
|
||||
</reg32>
|
||||
|
||||
<reg32 offset="0xb4ca" name="SP_PS_2D_SRC_FLAGS_LO"/>
|
||||
<reg32 offset="0xb4cb" name="SP_PS_2D_SRC_FLAGS_HI"/>
|
||||
<reg64 offset="0xb4ca" name="SP_PS_2D_SRC_FLAGS" type="waddress"/>
|
||||
<reg32 offset="0xb4cc" name="SP_PS_2D_SRC_FLAGS_PITCH">
|
||||
<bitfield name="PITCH" low="0" high="10" shr="6" type="uint"/>
|
||||
<bitfield name="ARRAY_PITCH" low="11" high="21" shr="7" type="uint"/>
|
||||
|
@@ -40,8 +40,7 @@ tu_extensions_c = custom_target(
|
||||
)
|
||||
|
||||
libtu_files = files(
|
||||
'tu_blit.c',
|
||||
'tu_blit.h',
|
||||
'tu_clear_blit.c',
|
||||
'tu_cmd_buffer.c',
|
||||
'tu_cs.c',
|
||||
'tu_cs.h',
|
||||
@@ -52,11 +51,6 @@ libtu_files = files(
|
||||
'tu_fence.c',
|
||||
'tu_formats.c',
|
||||
'tu_image.c',
|
||||
'tu_meta_blit.c',
|
||||
'tu_meta_buffer.c',
|
||||
'tu_meta_clear.c',
|
||||
'tu_meta_copy.c',
|
||||
'tu_meta_resolve.c',
|
||||
'tu_pass.c',
|
||||
'tu_pipeline.c',
|
||||
'tu_pipeline_cache.c',
|
||||
|
@@ -1,372 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2019 Valve Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jonathan Marek <jonathan@marek.ca>
|
||||
*
|
||||
*/
|
||||
|
||||
#include "tu_blit.h"
|
||||
|
||||
#include "a6xx.xml.h"
|
||||
#include "adreno_common.xml.h"
|
||||
#include "adreno_pm4.xml.h"
|
||||
|
||||
#include "vk_format.h"
|
||||
|
||||
#include "tu_cs.h"
|
||||
|
||||
/* TODO:
|
||||
* - Avoid disabling tiling for swapped formats
|
||||
* (image_to_image copy doesn't deal with it)
|
||||
* - Fix d24_unorm_s8_uint support & aspects
|
||||
* - UBWC
|
||||
*/
|
||||
|
||||
static VkFormat
|
||||
blit_copy_format(VkFormat format)
|
||||
{
|
||||
switch (vk_format_get_blocksizebits(format)) {
|
||||
case 8: return VK_FORMAT_R8_UINT;
|
||||
case 16: return VK_FORMAT_R16_UINT;
|
||||
case 32: return VK_FORMAT_R32_UINT;
|
||||
case 64: return VK_FORMAT_R32G32_UINT;
|
||||
case 96: return VK_FORMAT_R32G32B32_UINT;
|
||||
case 128:return VK_FORMAT_R32G32B32A32_UINT;
|
||||
default:
|
||||
unreachable("unhandled format size");
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
blit_image_info(const struct tu_blit_surf *img, struct tu_native_format fmt, bool stencil_read)
|
||||
{
|
||||
if (fmt.fmt == FMT6_Z24_UNORM_S8_UINT)
|
||||
fmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
|
||||
|
||||
if (stencil_read)
|
||||
fmt.swap = XYZW;
|
||||
|
||||
return A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(fmt.fmt) |
|
||||
A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(img->tile_mode) |
|
||||
A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(fmt.swap) |
|
||||
COND(vk_format_is_srgb(img->fmt), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
|
||||
COND(img->ubwc_size, A6XX_SP_PS_2D_SRC_INFO_FLAGS);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_blit_step(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
|
||||
const struct tu_blit *blt)
|
||||
{
|
||||
struct tu_physical_device *phys_dev = cmdbuf->device->physical_device;
|
||||
|
||||
struct tu_native_format dfmt = tu6_format_color(blt->dst.fmt, blt->dst.image_tile_mode);
|
||||
struct tu_native_format sfmt = tu6_format_texture(blt->src.fmt, blt->src.image_tile_mode);
|
||||
|
||||
if (dfmt.fmt == FMT6_Z24_UNORM_S8_UINT)
|
||||
dfmt.fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
|
||||
|
||||
enum a6xx_2d_ifmt ifmt = tu6_fmt_to_ifmt(dfmt.fmt);
|
||||
|
||||
if (vk_format_is_srgb(blt->dst.fmt)) {
|
||||
assert(ifmt == R2D_UNORM8);
|
||||
ifmt = R2D_UNORM8_SRGB;
|
||||
}
|
||||
|
||||
uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_ROTATE(blt->rotation) |
|
||||
COND(blt->type == TU_BLIT_CLEAR, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
|
||||
A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(dfmt.fmt) | /* not required? */
|
||||
COND(dfmt.fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8,
|
||||
A6XX_RB_2D_BLIT_CNTL_D24S8) |
|
||||
A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
|
||||
A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
|
||||
tu_cs_emit(cs, blit_cntl);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
|
||||
tu_cs_emit(cs, blit_cntl);
|
||||
|
||||
/*
|
||||
* Emit source:
|
||||
*/
|
||||
if (blt->type == TU_BLIT_CLEAR) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
|
||||
tu_cs_emit(cs, blt->clear_value[0]);
|
||||
tu_cs_emit(cs, blt->clear_value[1]);
|
||||
tu_cs_emit(cs, blt->clear_value[2]);
|
||||
tu_cs_emit(cs, blt->clear_value[3]);
|
||||
} else {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
|
||||
tu_cs_emit(cs, blit_image_info(&blt->src, sfmt, blt->stencil_read) |
|
||||
A6XX_SP_PS_2D_SRC_INFO_SAMPLES(tu_msaa_samples(blt->src.samples)) |
|
||||
/* TODO: should disable this bit for integer formats ? */
|
||||
COND(blt->src.samples > 1, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
|
||||
COND(blt->filter, A6XX_SP_PS_2D_SRC_INFO_FILTER) |
|
||||
0x500000);
|
||||
tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(blt->src.x + blt->src.width) |
|
||||
A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(blt->src.y + blt->src.height));
|
||||
tu_cs_emit_qw(cs, blt->src.va);
|
||||
tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_PITCH_PITCH(blt->src.pitch));
|
||||
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
|
||||
if (blt->src.ubwc_size) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS_LO, 6);
|
||||
tu_cs_emit_qw(cs, blt->src.ubwc_va);
|
||||
tu_cs_emit(cs, A6XX_SP_PS_2D_SRC_FLAGS_PITCH_PITCH(blt->src.ubwc_pitch) |
|
||||
A6XX_SP_PS_2D_SRC_FLAGS_PITCH_ARRAY_PITCH(blt->src.ubwc_size >> 2));
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit destination:
|
||||
*/
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 9);
|
||||
tu_cs_emit(cs, blit_image_info(&blt->dst, dfmt, false));
|
||||
tu_cs_emit_qw(cs, blt->dst.va);
|
||||
tu_cs_emit(cs, A6XX_RB_2D_DST_SIZE_PITCH(blt->dst.pitch));
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
|
||||
if (blt->dst.ubwc_size) {
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS_LO, 6);
|
||||
tu_cs_emit_qw(cs, blt->dst.ubwc_va);
|
||||
tu_cs_emit(cs, A6XX_RB_2D_DST_FLAGS_PITCH_PITCH(blt->dst.ubwc_pitch) |
|
||||
A6XX_RB_2D_DST_FLAGS_PITCH_ARRAY_PITCH(blt->dst.ubwc_size >> 2));
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
tu_cs_emit(cs, 0x00000000);
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
|
||||
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_X_X(blt->src.x));
|
||||
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_X_X(blt->src.x + blt->src.width - 1));
|
||||
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_TL_Y_Y(blt->src.y));
|
||||
tu_cs_emit(cs, A6XX_GRAS_2D_SRC_BR_Y_Y(blt->src.y + blt->src.height - 1));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_DST_TL, 2);
|
||||
tu_cs_emit(cs, A6XX_GRAS_2D_DST_TL_X(blt->dst.x) |
|
||||
A6XX_GRAS_2D_DST_TL_Y(blt->dst.y));
|
||||
tu_cs_emit(cs, A6XX_GRAS_2D_DST_BR_X(blt->dst.x + blt->dst.width - 1) |
|
||||
A6XX_GRAS_2D_DST_BR_Y(blt->dst.y + blt->dst.height - 1));
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
|
||||
tu_cs_emit(cs, 0x3f);
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8C01, 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
|
||||
if (dfmt.fmt == FMT6_10_10_10_2_UNORM_DEST)
|
||||
dfmt.fmt = FMT6_16_16_16_16_FLOAT;
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_SP_2D_SRC_FORMAT, 1);
|
||||
tu_cs_emit(cs, COND(vk_format_is_sint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_SINT) |
|
||||
COND(vk_format_is_uint(blt->src.fmt), A6XX_SP_2D_SRC_FORMAT_UINT) |
|
||||
A6XX_SP_2D_SRC_FORMAT_COLOR_FORMAT(dfmt.fmt) |
|
||||
COND(ifmt == R2D_UNORM8_SRGB, A6XX_SP_2D_SRC_FORMAT_SRGB) |
|
||||
A6XX_SP_2D_SRC_FORMAT_MASK(0xf));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
|
||||
tu_cs_emit(cs, phys_dev->magic.RB_UNKNOWN_8E04_blit);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_BLIT, 1);
|
||||
tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
|
||||
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_8E04, 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
}
|
||||
|
||||
void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
|
||||
struct tu_blit *blt)
|
||||
{
|
||||
struct tu_physical_device *phys_dev = cmdbuf->device->physical_device;
|
||||
|
||||
switch (blt->type) {
|
||||
case TU_BLIT_COPY:
|
||||
blt->stencil_read =
|
||||
blt->dst.fmt == VK_FORMAT_R8_UNORM &&
|
||||
blt->src.fmt == VK_FORMAT_D24_UNORM_S8_UINT;
|
||||
|
||||
assert(vk_format_get_blocksize(blt->dst.fmt) ==
|
||||
vk_format_get_blocksize(blt->src.fmt) || blt->stencil_read);
|
||||
assert(blt->src.samples == blt->dst.samples);
|
||||
|
||||
if (vk_format_is_compressed(blt->src.fmt)) {
|
||||
unsigned block_width = vk_format_get_blockwidth(blt->src.fmt);
|
||||
unsigned block_height = vk_format_get_blockheight(blt->src.fmt);
|
||||
|
||||
blt->src.pitch /= block_width;
|
||||
blt->src.x /= block_width;
|
||||
blt->src.y /= block_height;
|
||||
blt->src.fmt = blit_copy_format(blt->src.fmt);
|
||||
|
||||
/* for image_to_image copy, width/height is on the src format */
|
||||
blt->dst.width = blt->src.width = DIV_ROUND_UP(blt->src.width, block_width);
|
||||
blt->dst.height = blt->src.height = DIV_ROUND_UP(blt->src.height, block_height);
|
||||
}
|
||||
|
||||
if (vk_format_is_compressed(blt->dst.fmt)) {
|
||||
unsigned block_width = vk_format_get_blockwidth(blt->dst.fmt);
|
||||
unsigned block_height = vk_format_get_blockheight(blt->dst.fmt);
|
||||
|
||||
blt->dst.pitch /= block_width;
|
||||
blt->dst.x /= block_width;
|
||||
blt->dst.y /= block_height;
|
||||
blt->dst.fmt = blit_copy_format(blt->dst.fmt);
|
||||
}
|
||||
|
||||
if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
|
||||
blt->dst.fmt = blit_copy_format(blt->dst.fmt);
|
||||
|
||||
if (blt->src.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
|
||||
blt->src.fmt = blit_copy_format(blt->src.fmt);
|
||||
|
||||
/* TODO: multisample image copy does not work correctly with tiling/UBWC */
|
||||
blt->src.x *= blt->src.samples;
|
||||
blt->dst.x *= blt->dst.samples;
|
||||
blt->src.width *= blt->src.samples;
|
||||
blt->dst.width *= blt->dst.samples;
|
||||
blt->src.samples = 1;
|
||||
blt->dst.samples = 1;
|
||||
break;
|
||||
case TU_BLIT_CLEAR:
|
||||
/* unsupported format cleared as UINT32 */
|
||||
if (blt->dst.fmt == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
|
||||
blt->dst.fmt = VK_FORMAT_R32_UINT;
|
||||
/* TODO: multisample image clearing also seems not to work with certain
|
||||
* formats. The blob uses a shader-based clear in these cases.
|
||||
*/
|
||||
blt->dst.x *= blt->dst.samples;
|
||||
blt->dst.width *= blt->dst.samples;
|
||||
blt->dst.samples = 1;
|
||||
blt->src = blt->dst;
|
||||
break;
|
||||
default:
|
||||
assert(blt->dst.samples == 1);
|
||||
}
|
||||
|
||||
tu6_emit_event_write(cmdbuf, cs, LRZ_FLUSH, false);
|
||||
tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_COLOR_TS, true);
|
||||
tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_DEPTH_TS, true);
|
||||
tu6_emit_event_write(cmdbuf, cs, PC_CCU_INVALIDATE_COLOR, false);
|
||||
tu6_emit_event_write(cmdbuf, cs, PC_CCU_INVALIDATE_DEPTH, false);
|
||||
|
||||
tu_cs_emit_wfi(cs);
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_bypass));
|
||||
|
||||
/* buffer copy setup */
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
|
||||
|
||||
for (unsigned layer = 0; layer < blt->layers; layer++) {
|
||||
if (blt->buffer) {
|
||||
struct tu_blit line_blt = *blt;
|
||||
uint64_t dst_va = line_blt.dst.va, src_va = line_blt.src.va;
|
||||
unsigned blocksize = vk_format_get_blocksize(blt->src.fmt);
|
||||
uint32_t size = line_blt.src.width, tmp;
|
||||
|
||||
while (size) {
|
||||
line_blt.src.x = (src_va & 63) / blocksize;
|
||||
line_blt.src.va = src_va & ~63;
|
||||
tmp = MIN2(size, 0x4000 - line_blt.src.x);
|
||||
|
||||
line_blt.dst.x = (dst_va & 63) / blocksize;
|
||||
line_blt.dst.va = dst_va & ~63;
|
||||
tmp = MIN2(tmp, 0x4000 - line_blt.dst.x);
|
||||
|
||||
line_blt.src.width = line_blt.dst.width = tmp;
|
||||
|
||||
emit_blit_step(cmdbuf, cs, &line_blt);
|
||||
|
||||
src_va += tmp * blocksize;
|
||||
dst_va += tmp * blocksize;
|
||||
size -= tmp;
|
||||
}
|
||||
} else if ((blt->src.va & 63) || (blt->src.pitch & 63)) {
|
||||
/* per line copy path (buffer_to_image) */
|
||||
assert(blt->type == TU_BLIT_COPY && !blt->src.image_tile_mode);
|
||||
struct tu_blit line_blt = *blt;
|
||||
uint64_t src_va = line_blt.src.va + blt->src.pitch * blt->src.y;
|
||||
|
||||
line_blt.src.y = 0;
|
||||
line_blt.src.pitch = 0;
|
||||
line_blt.src.height = 1;
|
||||
line_blt.dst.height = 1;
|
||||
|
||||
for (unsigned y = 0; y < blt->src.height; y++) {
|
||||
line_blt.src.x = blt->src.x + (src_va & 63) / vk_format_get_blocksize(blt->src.fmt);
|
||||
line_blt.src.va = src_va & ~63;
|
||||
|
||||
emit_blit_step(cmdbuf, cs, &line_blt);
|
||||
|
||||
line_blt.dst.y++;
|
||||
src_va += blt->src.pitch;
|
||||
}
|
||||
} else if ((blt->dst.va & 63) || (blt->dst.pitch & 63)) {
|
||||
/* per line copy path (image_to_buffer) */
|
||||
assert(blt->type == TU_BLIT_COPY && !blt->dst.image_tile_mode);
|
||||
struct tu_blit line_blt = *blt;
|
||||
uint64_t dst_va = line_blt.dst.va + blt->dst.pitch * blt->dst.y;
|
||||
|
||||
line_blt.dst.y = 0;
|
||||
line_blt.dst.pitch = 0;
|
||||
line_blt.src.height = 1;
|
||||
line_blt.dst.height = 1;
|
||||
|
||||
for (unsigned y = 0; y < blt->src.height; y++) {
|
||||
line_blt.dst.x = blt->dst.x + (dst_va & 63) / vk_format_get_blocksize(blt->dst.fmt);
|
||||
line_blt.dst.va = dst_va & ~63;
|
||||
|
||||
emit_blit_step(cmdbuf, cs, &line_blt);
|
||||
|
||||
line_blt.src.y++;
|
||||
dst_va += blt->dst.pitch;
|
||||
}
|
||||
} else {
|
||||
emit_blit_step(cmdbuf, cs, blt);
|
||||
}
|
||||
blt->dst.va += blt->dst.layer_size;
|
||||
blt->src.va += blt->src.layer_size;
|
||||
blt->dst.ubwc_va += blt->dst.ubwc_size;
|
||||
blt->src.ubwc_va += blt->src.ubwc_size;
|
||||
}
|
||||
|
||||
tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_COLOR_TS, true);
|
||||
tu6_emit_event_write(cmdbuf, cs, PC_CCU_FLUSH_DEPTH_TS, true);
|
||||
tu6_emit_event_write(cmdbuf, cs, CACHE_FLUSH_TS, true);
|
||||
tu6_emit_event_write(cmdbuf, cs, CACHE_INVALIDATE, false);
|
||||
}
|
@@ -1,145 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2019 Valve Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jonathan Marek <jonathan@marek.ca>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef TU_BLIT_H
|
||||
#define TU_BLIT_H
|
||||
|
||||
#include "tu_private.h"
|
||||
|
||||
#include "vk_format.h"
|
||||
|
||||
struct tu_blit_surf {
|
||||
VkFormat fmt;
|
||||
enum a6xx_tile_mode tile_mode;
|
||||
enum a6xx_tile_mode image_tile_mode;
|
||||
uint64_t va;
|
||||
uint32_t pitch, layer_size;
|
||||
uint32_t x, y;
|
||||
uint32_t width, height;
|
||||
unsigned samples;
|
||||
uint64_t ubwc_va;
|
||||
uint32_t ubwc_pitch;
|
||||
uint32_t ubwc_size;
|
||||
};
|
||||
|
||||
static inline struct tu_blit_surf
|
||||
tu_blit_surf(struct tu_image *image,
|
||||
VkImageSubresourceLayers subres,
|
||||
const VkOffset3D *offsets)
|
||||
{
|
||||
unsigned layer = subres.baseArrayLayer;
|
||||
if (image->type == VK_IMAGE_TYPE_3D) {
|
||||
assert(layer == 0);
|
||||
layer = MIN2(offsets[0].z, offsets[1].z);
|
||||
}
|
||||
|
||||
return (struct tu_blit_surf) {
|
||||
.fmt = image->vk_format,
|
||||
.tile_mode = tu6_get_image_tile_mode(image, subres.mipLevel),
|
||||
.image_tile_mode = image->layout.tile_mode,
|
||||
.va = tu_image_base(image, subres.mipLevel, layer),
|
||||
.pitch = tu_image_stride(image, subres.mipLevel),
|
||||
.layer_size = tu_layer_size(image, subres.mipLevel),
|
||||
.x = MIN2(offsets[0].x, offsets[1].x),
|
||||
.y = MIN2(offsets[0].y, offsets[1].y),
|
||||
.width = abs(offsets[1].x - offsets[0].x),
|
||||
.height = abs(offsets[1].y - offsets[0].y),
|
||||
.samples = image->samples,
|
||||
.ubwc_va = tu_image_ubwc_base(image, subres.mipLevel, layer),
|
||||
.ubwc_pitch = tu_image_ubwc_pitch(image, subres.mipLevel),
|
||||
.ubwc_size = tu_image_ubwc_size(image, subres.mipLevel),
|
||||
};
|
||||
}
|
||||
|
||||
static inline struct tu_blit_surf
|
||||
tu_blit_surf_ext(struct tu_image *image,
|
||||
VkImageSubresourceLayers subres,
|
||||
VkOffset3D offset,
|
||||
VkExtent3D extent)
|
||||
{
|
||||
return tu_blit_surf(image, subres, (VkOffset3D[]) {
|
||||
offset, {.x = offset.x + extent.width,
|
||||
.y = offset.y + extent.height,
|
||||
.z = offset.z}
|
||||
});
|
||||
}
|
||||
|
||||
static inline struct tu_blit_surf
|
||||
tu_blit_surf_whole(struct tu_image *image, int level, int layer)
|
||||
{
|
||||
return tu_blit_surf(image, (VkImageSubresourceLayers){
|
||||
.mipLevel = level,
|
||||
.baseArrayLayer = layer,
|
||||
}, (VkOffset3D[]) {
|
||||
{}, {
|
||||
u_minify(image->extent.width, level),
|
||||
u_minify(image->extent.height, level),
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static inline struct tu_blit_surf
|
||||
sysmem_attachment_surf(const struct tu_image_view *view, uint32_t base_layer,
|
||||
const VkRect2D *rect)
|
||||
{
|
||||
return tu_blit_surf_ext(view->image, (VkImageSubresourceLayers) {
|
||||
.mipLevel = view->base_mip,
|
||||
.baseArrayLayer = base_layer,
|
||||
}, (VkOffset3D) {
|
||||
.x = rect->offset.x,
|
||||
.y = rect->offset.y,
|
||||
.z = 0,
|
||||
}, (VkExtent3D) {
|
||||
.width = rect->extent.width,
|
||||
.height = rect->extent.height,
|
||||
.depth = 1,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
enum tu_blit_type {
|
||||
TU_BLIT_DEFAULT,
|
||||
TU_BLIT_COPY,
|
||||
TU_BLIT_CLEAR,
|
||||
};
|
||||
|
||||
struct tu_blit {
|
||||
struct tu_blit_surf dst;
|
||||
struct tu_blit_surf src;
|
||||
uint32_t layers;
|
||||
bool filter;
|
||||
bool stencil_read;
|
||||
bool buffer; /* 1d copy/clear */
|
||||
enum a6xx_rotation rotation;
|
||||
uint32_t clear_value[4];
|
||||
enum tu_blit_type type;
|
||||
};
|
||||
|
||||
void tu_blit(struct tu_cmd_buffer *cmdbuf, struct tu_cs *cs,
|
||||
struct tu_blit *blt);
|
||||
|
||||
#endif /* TU_BLIT_H */
|
2390
src/freedreno/vulkan/tu_clear_blit.c
Normal file
2390
src/freedreno/vulkan/tu_clear_blit.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -33,7 +33,6 @@
|
||||
#include "vk_format.h"
|
||||
|
||||
#include "tu_cs.h"
|
||||
#include "tu_blit.h"
|
||||
|
||||
#define OVERFLOW_FLAG_REG REG_A6XX_CP_SCRATCH_REG(0)
|
||||
|
||||
@@ -111,69 +110,6 @@ tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other)
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_linear_mipmapped(const struct tu_image_view *iview)
|
||||
{
|
||||
return iview->image->layout.tile_mode == TILE6_LINEAR &&
|
||||
iview->base_mip != iview->image->level_count - 1;
|
||||
}
|
||||
|
||||
static bool
|
||||
force_sysmem(const struct tu_cmd_buffer *cmd,
|
||||
const struct VkRect2D *render_area)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
bool has_linear_mipmapped_store = false;
|
||||
const struct tu_render_pass *pass = cmd->state.pass;
|
||||
|
||||
/* Layered rendering requires sysmem. */
|
||||
if (fb->layers > 1)
|
||||
return true;
|
||||
|
||||
/* Iterate over all the places we call tu6_emit_store_attachment() */
|
||||
for (unsigned i = 0; i < pass->subpass_count; i++) {
|
||||
const struct tu_subpass *subpass = &pass->subpasses[i];
|
||||
if (subpass->resolve_attachments) {
|
||||
for (unsigned i = 0; i < subpass->color_count; i++) {
|
||||
uint32_t a = subpass->resolve_attachments[i].attachment;
|
||||
if (a != VK_ATTACHMENT_UNUSED &&
|
||||
cmd->state.pass->attachments[a].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
|
||||
const struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
if (is_linear_mipmapped(iview)) {
|
||||
has_linear_mipmapped_store = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < pass->attachment_count; i++) {
|
||||
if (pass->attachments[i].gmem_offset >= 0 &&
|
||||
cmd->state.pass->attachments[i].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
|
||||
const struct tu_image_view *iview = fb->attachments[i].attachment;
|
||||
if (is_linear_mipmapped(iview)) {
|
||||
has_linear_mipmapped_store = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Linear textures cannot have any padding between mipmap levels and their
|
||||
* height isn't padded, while at the same time the GMEM->MEM resolve does
|
||||
* not have per-pixel granularity, so if the image height isn't aligned to
|
||||
* the resolve granularity and the render area is tall enough, we may wind
|
||||
* up writing past the bottom of the image into the next miplevel or even
|
||||
* past the end of the image. For the last miplevel, the layout code should
|
||||
* insert enough padding so that the overdraw writes to the padding. To
|
||||
* work around this, we force-enable sysmem rendering.
|
||||
*/
|
||||
const uint32_t y2 = render_area->offset.y + render_area->extent.height;
|
||||
const uint32_t aligned_y2 = ALIGN_POT(y2, GMEM_ALIGN_H);
|
||||
|
||||
return has_linear_mipmapped_store && aligned_y2 > fb->height;
|
||||
}
|
||||
|
||||
static void
|
||||
tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling,
|
||||
const struct tu_device *dev,
|
||||
@@ -421,10 +357,6 @@ tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
}
|
||||
}
|
||||
|
||||
#define tu_image_view_ubwc_pitches(iview) \
|
||||
.pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip), \
|
||||
.array_pitch = tu_image_ubwc_size(iview->image, iview->base_mip) >> 2
|
||||
|
||||
static void
|
||||
tu6_emit_zs(struct tu_cmd_buffer *cmd,
|
||||
const struct tu_subpass *subpass,
|
||||
@@ -497,20 +429,18 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd,
|
||||
continue;
|
||||
|
||||
const struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
const enum a6xx_tile_mode tile_mode =
|
||||
tu6_get_image_tile_mode(iview->image, iview->base_mip);
|
||||
|
||||
mrt_comp[i] = 0xf;
|
||||
|
||||
if (vk_format_is_srgb(iview->vk_format))
|
||||
srgb_cntl |= (1 << i);
|
||||
|
||||
const struct tu_native_format format =
|
||||
tu6_format_color(iview->vk_format, iview->image->layout.tile_mode);
|
||||
struct tu_native_format format =
|
||||
tu6_format_image(iview->image, iview->vk_format, iview->base_mip);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_MRT_BUF_INFO(i,
|
||||
.color_tile_mode = tile_mode,
|
||||
.color_tile_mode = format.tile_mode,
|
||||
.color_format = format.fmt,
|
||||
.color_swap = format.swap),
|
||||
A6XX_RB_MRT_PITCH(i, tu_image_stride(iview->image, iview->base_mip)),
|
||||
@@ -563,12 +493,10 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd,
|
||||
.type = LAYER_2D_ARRAY));
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_msaa(struct tu_cmd_buffer *cmd,
|
||||
const struct tu_subpass *subpass,
|
||||
struct tu_cs *cs)
|
||||
void
|
||||
tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits vk_samples)
|
||||
{
|
||||
const enum a3xx_msaa_samples samples = tu_msaa_samples(subpass->samples);
|
||||
const enum a3xx_msaa_samples samples = tu_msaa_samples(vk_samples);
|
||||
bool msaa_disable = samples == MSAA_ONE;
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
@@ -681,51 +609,8 @@ tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool align)
|
||||
A6XX_RB_BLIT_SCISSOR_BR(.x = x2, .y = y2));
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_blit_info(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
const struct tu_image_view *iview,
|
||||
uint32_t gmem_offset,
|
||||
bool resolve)
|
||||
{
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_INFO(.unk0 = !resolve, .gmem = !resolve));
|
||||
|
||||
const struct tu_native_format format =
|
||||
tu6_format_color(iview->vk_format, iview->image->layout.tile_mode);
|
||||
|
||||
enum a6xx_tile_mode tile_mode =
|
||||
tu6_get_image_tile_mode(iview->image, iview->base_mip);
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_DST_INFO(
|
||||
.tile_mode = tile_mode,
|
||||
.samples = tu_msaa_samples(iview->image->samples),
|
||||
.color_format = format.fmt,
|
||||
.color_swap = format.swap,
|
||||
.flags = iview->image->layout.ubwc_layer_size != 0),
|
||||
A6XX_RB_BLIT_DST(tu_image_view_base_ref(iview)),
|
||||
A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)),
|
||||
A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layout.layer_size));
|
||||
|
||||
if (iview->image->layout.ubwc_layer_size) {
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_FLAG_DST(tu_image_view_ubwc_base_ref(iview)),
|
||||
A6XX_RB_BLIT_FLAG_DST_PITCH(tu_image_view_ubwc_pitches(iview)));
|
||||
}
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_BLIT_BASE_GMEM(gmem_offset));
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
tu6_emit_event_write(cmd, cs, BLIT, false);
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_window_scissor(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
void
|
||||
tu6_emit_window_scissor(struct tu_cs *cs,
|
||||
uint32_t x1,
|
||||
uint32_t y1,
|
||||
uint32_t x2,
|
||||
@@ -740,11 +625,8 @@ tu6_emit_window_scissor(struct tu_cmd_buffer *cmd,
|
||||
A6XX_GRAS_RESOLVE_CNTL_2(.x = x2, .y = y2));
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_window_offset(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t x1,
|
||||
uint32_t y1)
|
||||
void
|
||||
tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1)
|
||||
{
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_WINDOW_OFFSET(.x = x1, .y = y1));
|
||||
@@ -783,6 +665,9 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd)
|
||||
if (!cmd->state.pass->gmem_pixels)
|
||||
return true;
|
||||
|
||||
if (cmd->state.framebuffer->layers > 1)
|
||||
return true;
|
||||
|
||||
return cmd->state.tiling_config.force_sysmem;
|
||||
}
|
||||
|
||||
@@ -801,8 +686,8 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
||||
const uint32_t y1 = tile->begin.y;
|
||||
const uint32_t x2 = tile->end.x - 1;
|
||||
const uint32_t y2 = tile->end.y - 1;
|
||||
tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
|
||||
tu6_emit_window_offset(cmd, cs, x1, y1);
|
||||
tu6_emit_window_scissor(cs, x1, y1, x2, y2);
|
||||
tu6_emit_window_offset(cs, x1, y1);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_VPC_SO_OVERRIDE(.so_disable = false));
|
||||
@@ -861,93 +746,6 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_load_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a)
|
||||
{
|
||||
const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
const struct tu_render_pass_attachment *attachment =
|
||||
&cmd->state.pass->attachments[a];
|
||||
|
||||
if (attachment->gmem_offset < 0)
|
||||
return;
|
||||
|
||||
const uint32_t x1 = tiling->render_area.offset.x;
|
||||
const uint32_t y1 = tiling->render_area.offset.y;
|
||||
const uint32_t x2 = x1 + tiling->render_area.extent.width;
|
||||
const uint32_t y2 = y1 + tiling->render_area.extent.height;
|
||||
const uint32_t tile_x2 =
|
||||
tiling->tile0.offset.x + tiling->tile0.extent.width * tiling->tile_count.width;
|
||||
const uint32_t tile_y2 =
|
||||
tiling->tile0.offset.y + tiling->tile0.extent.height * tiling->tile_count.height;
|
||||
bool need_load =
|
||||
x1 != tiling->tile0.offset.x || x2 != MIN2(fb->width, tile_x2) ||
|
||||
y1 != tiling->tile0.offset.y || y2 != MIN2(fb->height, tile_y2);
|
||||
|
||||
if (need_load)
|
||||
tu_finishme("improve handling of unaligned render area");
|
||||
|
||||
if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
|
||||
need_load = true;
|
||||
|
||||
if (vk_format_has_stencil(iview->vk_format) &&
|
||||
attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
|
||||
need_load = true;
|
||||
|
||||
if (need_load) {
|
||||
tu6_emit_blit_info(cmd, cs, iview, attachment->gmem_offset, false);
|
||||
tu6_emit_blit(cmd, cs);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
uint32_t a,
|
||||
const VkRenderPassBeginInfo *info)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
const struct tu_render_pass_attachment *attachment =
|
||||
&cmd->state.pass->attachments[a];
|
||||
unsigned clear_mask = 0;
|
||||
|
||||
/* note: this means it isn't used by any subpass and shouldn't be cleared anyway */
|
||||
if (attachment->gmem_offset < 0)
|
||||
return;
|
||||
|
||||
if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
clear_mask = 0xf;
|
||||
|
||||
if (vk_format_has_stencil(iview->vk_format)) {
|
||||
clear_mask &= 0x1;
|
||||
if (attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
clear_mask |= 0x2;
|
||||
}
|
||||
if (!clear_mask)
|
||||
return;
|
||||
|
||||
tu_clear_gmem_attachment(cmd, cs, a, clear_mask,
|
||||
&info->pClearValues[a]);
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_predicated_blit(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t a,
|
||||
uint32_t gmem_a,
|
||||
bool resolve)
|
||||
{
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
|
||||
|
||||
tu6_emit_blit_info(cmd, cs,
|
||||
cmd->state.framebuffer->attachments[a].attachment,
|
||||
cmd->state.pass->attachments[gmem_a].gmem_offset, resolve);
|
||||
tu6_emit_blit(cmd, cs);
|
||||
|
||||
tu_cond_exec_end(cs);
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
@@ -955,48 +753,10 @@ tu6_emit_sysmem_resolve(struct tu_cmd_buffer *cmd,
|
||||
uint32_t gmem_a)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_image_view *dst = fb->attachments[a].attachment;
|
||||
const struct tu_image_view *src = fb->attachments[gmem_a].attachment;
|
||||
struct tu_image_view *dst = fb->attachments[a].attachment;
|
||||
struct tu_image_view *src = fb->attachments[gmem_a].attachment;
|
||||
|
||||
tu_blit(cmd, cs, &(struct tu_blit) {
|
||||
.dst = sysmem_attachment_surf(dst, dst->base_layer,
|
||||
&cmd->state.tiling_config.render_area),
|
||||
.src = sysmem_attachment_surf(src, src->base_layer,
|
||||
&cmd->state.tiling_config.render_area),
|
||||
.layers = fb->layers,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/* Emit a MSAA resolve operation, with both gmem and sysmem paths. */
|
||||
static void tu6_emit_resolve(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t a,
|
||||
uint32_t gmem_a)
|
||||
{
|
||||
if (cmd->state.pass->attachments[a].store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE)
|
||||
return;
|
||||
|
||||
tu6_emit_predicated_blit(cmd, cs, a, gmem_a, true);
|
||||
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
|
||||
tu6_emit_sysmem_resolve(cmd, cs, a, gmem_a);
|
||||
tu_cond_exec_end(cs);
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_store_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t a,
|
||||
uint32_t gmem_a)
|
||||
{
|
||||
if (cmd->state.pass->attachments[a].store_op == VK_ATTACHMENT_STORE_OP_DONT_CARE)
|
||||
return;
|
||||
|
||||
tu6_emit_blit_info(cmd, cs,
|
||||
cmd->state.framebuffer->attachments[a].attachment,
|
||||
cmd->state.pass->attachments[gmem_a].gmem_offset, true);
|
||||
tu6_emit_blit(cmd, cs);
|
||||
tu_resolve_sysmem(cmd, cs, src, dst, fb->layers, &cmd->state.tiling_config.render_area);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1018,19 +778,20 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
|
||||
|
||||
tu6_emit_blit_scissor(cmd, cs, true);
|
||||
/* blit scissor may have been changed by CmdClearAttachments */
|
||||
tu6_emit_blit_scissor(cmd, cs, false);
|
||||
|
||||
for (uint32_t a = 0; a < pass->attachment_count; ++a) {
|
||||
if (pass->attachments[a].gmem_offset >= 0)
|
||||
tu6_emit_store_attachment(cmd, cs, a, a);
|
||||
tu_store_gmem_attachment(cmd, cs, a, a);
|
||||
}
|
||||
|
||||
if (subpass->resolve_attachments) {
|
||||
for (unsigned i = 0; i < subpass->color_count; i++) {
|
||||
uint32_t a = subpass->resolve_attachments[i].attachment;
|
||||
if (a != VK_ATTACHMENT_UNUSED)
|
||||
tu6_emit_store_attachment(cmd, cs, a,
|
||||
subpass->color_attachments[i].attachment);
|
||||
tu_store_gmem_attachment(cmd, cs, a,
|
||||
subpass->color_attachments[i].attachment);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1331,7 +1092,7 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
|
||||
uint32_t y2 = tiling->render_area.offset.y + tiling->render_area.extent.height - 1;
|
||||
|
||||
tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
|
||||
tu6_emit_window_scissor(cs, x1, y1, x2, y2);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
|
||||
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
|
||||
@@ -1395,44 +1156,6 @@ tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
cmd->wait_for_idle = false;
|
||||
}
|
||||
|
||||
static void
|
||||
tu_emit_sysmem_clear_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
uint32_t a,
|
||||
const VkRenderPassBeginInfo *info)
|
||||
{
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_image_view *iview = fb->attachments[a].attachment;
|
||||
const struct tu_render_pass_attachment *attachment =
|
||||
&cmd->state.pass->attachments[a];
|
||||
unsigned clear_mask = 0;
|
||||
|
||||
/* note: this means it isn't used by any subpass and shouldn't be cleared anyway */
|
||||
if (attachment->gmem_offset < 0)
|
||||
return;
|
||||
|
||||
if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
|
||||
clear_mask = 0xf;
|
||||
}
|
||||
|
||||
if (vk_format_has_stencil(iview->vk_format)) {
|
||||
clear_mask &= 0x1;
|
||||
if (attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
|
||||
clear_mask |= 0x2;
|
||||
if (clear_mask != 0x3)
|
||||
tu_finishme("depth/stencil only load op");
|
||||
}
|
||||
|
||||
if (!clear_mask)
|
||||
return;
|
||||
|
||||
tu_clear_sysmem_attachment(cmd, cs, a,
|
||||
&info->pClearValues[a], &(struct VkClearRect) {
|
||||
.rect = info->renderArea,
|
||||
.baseArrayLayer = iview->base_layer,
|
||||
.layerCount = iview->layer_count,
|
||||
});
|
||||
}
|
||||
|
||||
static void
|
||||
tu_emit_load_clear(struct tu_cmd_buffer *cmd,
|
||||
const VkRenderPassBeginInfo *info)
|
||||
@@ -1444,26 +1167,19 @@ tu_emit_load_clear(struct tu_cmd_buffer *cmd,
|
||||
tu6_emit_blit_scissor(cmd, cs, true);
|
||||
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
|
||||
tu6_emit_load_attachment(cmd, cs, i);
|
||||
tu_load_gmem_attachment(cmd, cs, i);
|
||||
|
||||
tu6_emit_blit_scissor(cmd, cs, false);
|
||||
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
|
||||
tu6_emit_clear_attachment(cmd, cs, i, info);
|
||||
tu_clear_gmem_attachment(cmd, cs, i, info);
|
||||
|
||||
tu_cond_exec_end(cs);
|
||||
|
||||
/* invalidate because reading input attachments will cache GMEM and
|
||||
* the cache isn''t updated when GMEM is written
|
||||
* TODO: is there a no-cache bit for textures?
|
||||
*/
|
||||
if (cmd->state.subpass->input_count)
|
||||
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
|
||||
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
|
||||
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
|
||||
tu_emit_sysmem_clear_attachment(cmd, cs, i, info);
|
||||
tu_clear_sysmem_attachment(cmd, cs, i, info);
|
||||
|
||||
tu_cond_exec_end(cs);
|
||||
}
|
||||
@@ -1476,8 +1192,8 @@ tu6_sysmem_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
|
||||
assert(fb->width > 0 && fb->height > 0);
|
||||
tu6_emit_window_scissor(cmd, cs, 0, 0, fb->width - 1, fb->height - 1);
|
||||
tu6_emit_window_offset(cmd, cs, 0, 0);
|
||||
tu6_emit_window_scissor(cs, 0, 0, fb->width - 1, fb->height - 1);
|
||||
tu6_emit_window_offset(cs, 0, 0);
|
||||
|
||||
tu6_emit_bin_size(cs, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */
|
||||
|
||||
@@ -1516,7 +1232,6 @@ tu6_sysmem_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
/* Do any resolves of the last subpass. These are handled in the
|
||||
* tile_store_ib in the gmem path.
|
||||
*/
|
||||
|
||||
const struct tu_subpass *subpass = cmd->state.subpass;
|
||||
if (subpass->resolve_attachments) {
|
||||
for (unsigned i = 0; i < subpass->color_count; i++) {
|
||||
@@ -1555,7 +1270,13 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
|
||||
tu_cs_emit(cs, 0x0);
|
||||
|
||||
tu6_emit_wfi(cmd, cs);
|
||||
/* TODO: flushing with barriers instead of blindly always flushing */
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR, false);
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH, false);
|
||||
|
||||
tu_cs_emit_wfi(cs);
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_CCU_CNTL(.offset = phys_dev->ccu_offset_gmem, .gmem = 1));
|
||||
|
||||
@@ -1684,7 +1405,7 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd)
|
||||
static void
|
||||
tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
const uint32_t tile_store_space = 32 + 23 * cmd->state.pass->attachment_count;
|
||||
const uint32_t tile_store_space = 11 + (35 * 2) * cmd->state.pass->attachment_count;
|
||||
struct tu_cs sub_cs;
|
||||
|
||||
VkResult result =
|
||||
@@ -1708,7 +1429,7 @@ tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd,
|
||||
struct tu_tiling_config *tiling = &cmd->state.tiling_config;
|
||||
|
||||
tiling->render_area = *render_area;
|
||||
tiling->force_sysmem = force_sysmem(cmd, render_area);
|
||||
tiling->force_sysmem = false;
|
||||
|
||||
tu_tiling_config_update_tile_layout(tiling, dev, cmd->state.pass->gmem_pixels);
|
||||
tu_tiling_config_update_pipe_layout(tiling, dev);
|
||||
@@ -2583,7 +2304,7 @@ tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer,
|
||||
|
||||
tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs);
|
||||
tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs);
|
||||
tu6_emit_msaa(cmd, cmd->state.subpass, &cmd->draw_cs);
|
||||
tu6_emit_msaa(&cmd->draw_cs, cmd->state.subpass->samples);
|
||||
tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false);
|
||||
|
||||
/* note: use_hw_binning only checks tiling config */
|
||||
@@ -2614,53 +2335,66 @@ tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents)
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
const struct tu_subpass *subpass = cmd->state.subpass++;
|
||||
/* TODO:
|
||||
* if msaa samples change between subpasses,
|
||||
* attachment store is broken for some attachments
|
||||
*/
|
||||
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
|
||||
|
||||
if (subpass->resolve_attachments) {
|
||||
tu6_emit_blit_scissor(cmd, cs, true);
|
||||
for (unsigned i = 0; i < subpass->color_count; i++) {
|
||||
uint32_t a = subpass->resolve_attachments[i].attachment;
|
||||
if (a != VK_ATTACHMENT_UNUSED) {
|
||||
tu6_emit_resolve(cmd, cs, a,
|
||||
subpass->color_attachments[i].attachment);
|
||||
}
|
||||
if (a == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
tu_store_gmem_attachment(cmd, cs, a,
|
||||
subpass->color_attachments[i].attachment);
|
||||
|
||||
if (pass->attachments[a].gmem_offset < 0)
|
||||
continue;
|
||||
|
||||
/* TODO:
|
||||
* check if the resolved attachment is needed by later subpasses,
|
||||
* if it is, should be doing a GMEM->GMEM resolve instead of GMEM->MEM->GMEM..
|
||||
*/
|
||||
tu_finishme("missing GMEM->GMEM resolve path\n");
|
||||
tu_emit_load_gmem_attachment(cmd, cs, a);
|
||||
}
|
||||
}
|
||||
|
||||
/* invalidate because reading input attachments will cache GMEM and
|
||||
* the cache isn''t updated when GMEM is written
|
||||
* TODO: is there a no-cache bit for textures?
|
||||
tu_cond_exec_end(cs);
|
||||
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
|
||||
|
||||
/* Emit flushes so that input attachments will read the correct value.
|
||||
* TODO: use subpass dependencies to flush or not
|
||||
*/
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
|
||||
|
||||
if (subpass->resolve_attachments) {
|
||||
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
|
||||
|
||||
for (unsigned i = 0; i < subpass->color_count; i++) {
|
||||
uint32_t a = subpass->resolve_attachments[i].attachment;
|
||||
if (a == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
tu6_emit_sysmem_resolve(cmd, cs, a,
|
||||
subpass->color_attachments[i].attachment);
|
||||
}
|
||||
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
|
||||
}
|
||||
|
||||
tu_cond_exec_end(cs);
|
||||
|
||||
/* subpass->input_count > 0 then texture cache invalidate is likely to be needed */
|
||||
if (cmd->state.subpass->input_count)
|
||||
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
|
||||
|
||||
/* emit mrt/zs/msaa/ubwc state for the subpass that is starting */
|
||||
tu6_emit_zs(cmd, cmd->state.subpass, cs);
|
||||
tu6_emit_mrt(cmd, cmd->state.subpass, cs);
|
||||
tu6_emit_msaa(cmd, cmd->state.subpass, cs);
|
||||
tu6_emit_msaa(cs, cmd->state.subpass->samples);
|
||||
tu6_emit_render_cntl(cmd, cmd->state.subpass, cs, false);
|
||||
|
||||
/* Emit flushes so that input attachments will read the correct value. This
|
||||
* is for sysmem only, although it shouldn't do much harm on gmem.
|
||||
*/
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS, true);
|
||||
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS, true);
|
||||
|
||||
/* TODO:
|
||||
* since we don't know how to do GMEM->GMEM resolve,
|
||||
* resolve attachments are resolved to memory then loaded to GMEM again if needed
|
||||
*/
|
||||
if (subpass->resolve_attachments) {
|
||||
for (unsigned i = 0; i < subpass->color_count; i++) {
|
||||
uint32_t a = subpass->resolve_attachments[i].attachment;
|
||||
if (a != VK_ATTACHMENT_UNUSED && pass->attachments[a].gmem_offset >= 0) {
|
||||
tu_finishme("missing GMEM->GMEM resolve, performance will suffer\n");
|
||||
tu6_emit_predicated_blit(cmd, cs, a, a, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -4137,7 +3871,7 @@ struct tu_barrier_info
|
||||
};
|
||||
|
||||
static void
|
||||
tu_barrier(struct tu_cmd_buffer *cmd_buffer,
|
||||
tu_barrier(struct tu_cmd_buffer *cmd,
|
||||
uint32_t memoryBarrierCount,
|
||||
const VkMemoryBarrier *pMemoryBarriers,
|
||||
uint32_t bufferMemoryBarrierCount,
|
||||
@@ -4146,13 +3880,24 @@ tu_barrier(struct tu_cmd_buffer *cmd_buffer,
|
||||
const VkImageMemoryBarrier *pImageMemoryBarriers,
|
||||
const struct tu_barrier_info *info)
|
||||
{
|
||||
/* renderpass case is only for subpass self-dependencies
|
||||
* which means syncing the render output with texture cache
|
||||
* note: only the CACHE_INVALIDATE is needed in GMEM mode
|
||||
* and in sysmem mode we might not need either color/depth flush
|
||||
*/
|
||||
if (cmd->state.pass) {
|
||||
tu6_emit_event_write(cmd, &cmd->draw_cs, PC_CCU_FLUSH_COLOR_TS, true);
|
||||
tu6_emit_event_write(cmd, &cmd->draw_cs, PC_CCU_FLUSH_DEPTH_TS, true);
|
||||
tu6_emit_event_write(cmd, &cmd->draw_cs, CACHE_INVALIDATE, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdPipelineBarrier(VkCommandBuffer commandBuffer,
|
||||
VkPipelineStageFlags srcStageMask,
|
||||
VkPipelineStageFlags destStageMask,
|
||||
VkBool32 byRegion,
|
||||
VkPipelineStageFlags dstStageMask,
|
||||
VkDependencyFlags dependencyFlags,
|
||||
uint32_t memoryBarrierCount,
|
||||
const VkMemoryBarrier *pMemoryBarriers,
|
||||
uint32_t bufferMemoryBarrierCount,
|
||||
|
@@ -28,10 +28,6 @@
|
||||
#include "registers/adreno_common.xml.h"
|
||||
#include "registers/a6xx.xml.h"
|
||||
|
||||
#include "util/format_r11g11b10f.h"
|
||||
#include "util/format_rgb9e5.h"
|
||||
#include "util/format_srgb.h"
|
||||
#include "util/u_half.h"
|
||||
#include "vk_format.h"
|
||||
#include "vk_util.h"
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
@@ -222,13 +218,13 @@ static const struct tu_native_format tu6_format_table[] = {
|
||||
TU6_xTx(E5B9G9R9_UFLOAT_PACK32, 9_9_9_E5_FLOAT, WZYX), /* 123 */
|
||||
|
||||
/* depth/stencil */
|
||||
TU6_xTC(D16_UNORM, 16_UNORM, WZYX), /* 124 */
|
||||
TU6_xTC(X8_D24_UNORM_PACK32, Z24_UNORM_S8_UINT, WZYX), /* 125 */
|
||||
TU6_xTC(D32_SFLOAT, 32_FLOAT, WZYX), /* 126 */
|
||||
TU6_xTC(S8_UINT, 8_UINT, WZYX), /* 127 */
|
||||
TU6_xxx(D16_UNORM_S8_UINT, X8Z16_UNORM, WZYX), /* 128 */
|
||||
TU6_xTC(D24_UNORM_S8_UINT, Z24_UNORM_S8_UINT, WZYX), /* 129 */
|
||||
TU6_xxx(D32_SFLOAT_S8_UINT, x, WZYX), /* 130 */
|
||||
TU6_xTC(D16_UNORM, 16_UNORM, WZYX), /* 124 */
|
||||
TU6_xTC(X8_D24_UNORM_PACK32, Z24_UNORM_S8_UINT_AS_R8G8B8A8, WZYX), /* 125 */
|
||||
TU6_xTC(D32_SFLOAT, 32_FLOAT, WZYX), /* 126 */
|
||||
TU6_xTC(S8_UINT, 8_UINT, WZYX), /* 127 */
|
||||
TU6_xxx(D16_UNORM_S8_UINT, X8Z16_UNORM, WZYX), /* 128 */
|
||||
TU6_xTC(D24_UNORM_S8_UINT, Z24_UNORM_S8_UINT_AS_R8G8B8A8, WZYX), /* 129 */
|
||||
TU6_xxx(D32_SFLOAT_S8_UINT, x, WZYX), /* 130 */
|
||||
|
||||
/* compressed */
|
||||
TU6_xTx(BC1_RGB_UNORM_BLOCK, DXT1, WZYX), /* 131 */
|
||||
@@ -348,75 +344,6 @@ tu6_format_texture(VkFormat format, enum a6xx_tile_mode tile_mode)
|
||||
return fmt;
|
||||
}
|
||||
|
||||
enum a6xx_2d_ifmt
|
||||
tu6_fmt_to_ifmt(enum a6xx_format fmt)
|
||||
{
|
||||
switch (fmt) {
|
||||
case FMT6_A8_UNORM:
|
||||
case FMT6_8_UNORM:
|
||||
case FMT6_8_SNORM:
|
||||
case FMT6_8_8_UNORM:
|
||||
case FMT6_8_8_SNORM:
|
||||
case FMT6_8_8_8_8_UNORM:
|
||||
case FMT6_8_8_8_X8_UNORM:
|
||||
case FMT6_8_8_8_8_SNORM:
|
||||
case FMT6_4_4_4_4_UNORM:
|
||||
case FMT6_5_5_5_1_UNORM:
|
||||
case FMT6_5_6_5_UNORM:
|
||||
case FMT6_Z24_UNORM_S8_UINT:
|
||||
case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
|
||||
return R2D_UNORM8;
|
||||
|
||||
case FMT6_32_UINT:
|
||||
case FMT6_32_SINT:
|
||||
case FMT6_32_32_UINT:
|
||||
case FMT6_32_32_SINT:
|
||||
case FMT6_32_32_32_32_UINT:
|
||||
case FMT6_32_32_32_32_SINT:
|
||||
return R2D_INT32;
|
||||
|
||||
case FMT6_16_UINT:
|
||||
case FMT6_16_SINT:
|
||||
case FMT6_16_16_UINT:
|
||||
case FMT6_16_16_SINT:
|
||||
case FMT6_16_16_16_16_UINT:
|
||||
case FMT6_16_16_16_16_SINT:
|
||||
case FMT6_10_10_10_2_UINT:
|
||||
return R2D_INT16;
|
||||
|
||||
case FMT6_8_UINT:
|
||||
case FMT6_8_SINT:
|
||||
case FMT6_8_8_UINT:
|
||||
case FMT6_8_8_SINT:
|
||||
case FMT6_8_8_8_8_UINT:
|
||||
case FMT6_8_8_8_8_SINT:
|
||||
return R2D_INT8;
|
||||
|
||||
case FMT6_16_UNORM:
|
||||
case FMT6_16_SNORM:
|
||||
case FMT6_16_16_UNORM:
|
||||
case FMT6_16_16_SNORM:
|
||||
case FMT6_16_16_16_16_UNORM:
|
||||
case FMT6_16_16_16_16_SNORM:
|
||||
case FMT6_32_FLOAT:
|
||||
case FMT6_32_32_FLOAT:
|
||||
case FMT6_32_32_32_32_FLOAT:
|
||||
return R2D_FLOAT32;
|
||||
|
||||
case FMT6_16_FLOAT:
|
||||
case FMT6_16_16_FLOAT:
|
||||
case FMT6_16_16_16_16_FLOAT:
|
||||
case FMT6_11_11_10_FLOAT:
|
||||
case FMT6_10_10_10_2_UNORM:
|
||||
case FMT6_10_10_10_2_UNORM_DEST:
|
||||
return R2D_FLOAT16;
|
||||
|
||||
default:
|
||||
unreachable("bad format");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
enum a6xx_depth_format
|
||||
tu6_pipe2depth(VkFormat format)
|
||||
{
|
||||
@@ -433,306 +360,6 @@ tu6_pipe2depth(VkFormat format)
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
tu_pack_mask(int bits)
|
||||
{
|
||||
assert(bits <= 32);
|
||||
return (1ull << bits) - 1;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
tu_pack_float32_for_unorm(float val, int bits)
|
||||
{
|
||||
const uint32_t max = tu_pack_mask(bits);
|
||||
if (val < 0.0f)
|
||||
return 0;
|
||||
else if (val > 1.0f)
|
||||
return max;
|
||||
else
|
||||
return _mesa_lroundevenf(val * (float) max);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
tu_pack_float32_for_snorm(float val, int bits)
|
||||
{
|
||||
const int32_t max = tu_pack_mask(bits - 1);
|
||||
int32_t tmp;
|
||||
if (val < -1.0f)
|
||||
tmp = -max;
|
||||
else if (val > 1.0f)
|
||||
tmp = max;
|
||||
else
|
||||
tmp = _mesa_lroundevenf(val * (float) max);
|
||||
|
||||
return tmp & tu_pack_mask(bits);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
tu_pack_float32_for_uscaled(float val, int bits)
|
||||
{
|
||||
const uint32_t max = tu_pack_mask(bits);
|
||||
if (val < 0.0f)
|
||||
return 0;
|
||||
else if (val > (float) max)
|
||||
return max;
|
||||
else
|
||||
return (uint32_t) val;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
tu_pack_float32_for_sscaled(float val, int bits)
|
||||
{
|
||||
const int32_t max = tu_pack_mask(bits - 1);
|
||||
const int32_t min = -max - 1;
|
||||
int32_t tmp;
|
||||
if (val < (float) min)
|
||||
tmp = min;
|
||||
else if (val > (float) max)
|
||||
tmp = max;
|
||||
else
|
||||
tmp = (int32_t) val;
|
||||
|
||||
return tmp & tu_pack_mask(bits);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
tu_pack_uint32_for_uint(uint32_t val, int bits)
|
||||
{
|
||||
return val & tu_pack_mask(bits);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
tu_pack_int32_for_sint(int32_t val, int bits)
|
||||
{
|
||||
return val & tu_pack_mask(bits);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
tu_pack_float32_for_sfloat(float val, int bits)
|
||||
{
|
||||
assert(bits == 16 || bits == 32);
|
||||
return bits == 16 ? util_float_to_half(val) : fui(val);
|
||||
}
|
||||
|
||||
union tu_clear_component_value {
|
||||
float float32;
|
||||
int32_t int32;
|
||||
uint32_t uint32;
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
tu_pack_clear_component_value(union tu_clear_component_value val,
|
||||
const struct util_format_channel_description *ch)
|
||||
{
|
||||
uint32_t packed;
|
||||
|
||||
switch (ch->type) {
|
||||
case UTIL_FORMAT_TYPE_UNSIGNED:
|
||||
/* normalized, scaled, or pure integer */
|
||||
if (ch->normalized)
|
||||
packed = tu_pack_float32_for_unorm(val.float32, ch->size);
|
||||
else if (ch->pure_integer)
|
||||
packed = tu_pack_uint32_for_uint(val.uint32, ch->size);
|
||||
else
|
||||
packed = tu_pack_float32_for_uscaled(val.float32, ch->size);
|
||||
break;
|
||||
case UTIL_FORMAT_TYPE_SIGNED:
|
||||
/* normalized, scaled, or pure integer */
|
||||
if (ch->normalized)
|
||||
packed = tu_pack_float32_for_snorm(val.float32, ch->size);
|
||||
else if (ch->pure_integer)
|
||||
packed = tu_pack_int32_for_sint(val.int32, ch->size);
|
||||
else
|
||||
packed = tu_pack_float32_for_sscaled(val.float32, ch->size);
|
||||
break;
|
||||
case UTIL_FORMAT_TYPE_FLOAT:
|
||||
packed = tu_pack_float32_for_sfloat(val.float32, ch->size);
|
||||
break;
|
||||
default:
|
||||
unreachable("unexpected channel type");
|
||||
packed = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
assert((packed & tu_pack_mask(ch->size)) == packed);
|
||||
return packed;
|
||||
}
|
||||
|
||||
static const struct util_format_channel_description *
|
||||
tu_get_format_channel_description(const struct util_format_description *desc,
|
||||
int comp)
|
||||
{
|
||||
switch (desc->swizzle[comp]) {
|
||||
case PIPE_SWIZZLE_X:
|
||||
return &desc->channel[0];
|
||||
case PIPE_SWIZZLE_Y:
|
||||
return &desc->channel[1];
|
||||
case PIPE_SWIZZLE_Z:
|
||||
return &desc->channel[2];
|
||||
case PIPE_SWIZZLE_W:
|
||||
return &desc->channel[3];
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static union tu_clear_component_value
|
||||
tu_get_clear_component_value(const VkClearValue *val, int comp,
|
||||
enum util_format_colorspace colorspace)
|
||||
{
|
||||
assert(comp < 4);
|
||||
|
||||
union tu_clear_component_value tmp;
|
||||
switch (colorspace) {
|
||||
case UTIL_FORMAT_COLORSPACE_ZS:
|
||||
assert(comp < 2);
|
||||
if (comp == 0)
|
||||
tmp.float32 = val->depthStencil.depth;
|
||||
else
|
||||
tmp.uint32 = val->depthStencil.stencil;
|
||||
break;
|
||||
case UTIL_FORMAT_COLORSPACE_SRGB:
|
||||
if (comp < 3) {
|
||||
tmp.float32 = util_format_linear_to_srgb_float(val->color.float32[comp]);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(comp < 4);
|
||||
tmp.uint32 = val->color.uint32[comp];
|
||||
break;
|
||||
}
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pack a VkClearValue into a 128-bit buffer. \a format is respected except
|
||||
* for the component order. The components are always packed in WZYX order
|
||||
* (i.e., msb is white and lsb is red).
|
||||
*
|
||||
* Return the number of uint32_t's used.
|
||||
*/
|
||||
void
|
||||
tu_pack_clear_value(const VkClearValue *val, VkFormat format, uint32_t buf[4])
|
||||
{
|
||||
const struct util_format_description *desc = vk_format_description(format);
|
||||
|
||||
switch (format) {
|
||||
case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
|
||||
buf[0] = float3_to_r11g11b10f(val->color.float32);
|
||||
return;
|
||||
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
|
||||
buf[0] = float3_to_rgb9e5(val->color.float32);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
assert(desc && desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
|
||||
|
||||
/* S8_UINT is special and has no depth */
|
||||
const int max_components =
|
||||
format == VK_FORMAT_S8_UINT ? 2 : desc->nr_channels;
|
||||
|
||||
int buf_offset = 0;
|
||||
int bit_shift = 0;
|
||||
for (int comp = 0; comp < max_components; comp++) {
|
||||
const struct util_format_channel_description *ch =
|
||||
tu_get_format_channel_description(desc, comp);
|
||||
if (!ch) {
|
||||
assert((format == VK_FORMAT_S8_UINT && comp == 0) ||
|
||||
(format == VK_FORMAT_X8_D24_UNORM_PACK32 && comp == 1));
|
||||
continue;
|
||||
}
|
||||
|
||||
union tu_clear_component_value v = tu_get_clear_component_value(
|
||||
val, comp, desc->colorspace);
|
||||
|
||||
/* move to the next uint32_t when there is not enough space */
|
||||
assert(ch->size <= 32);
|
||||
if (bit_shift + ch->size > 32) {
|
||||
buf_offset++;
|
||||
bit_shift = 0;
|
||||
}
|
||||
|
||||
if (bit_shift == 0)
|
||||
buf[buf_offset] = 0;
|
||||
|
||||
buf[buf_offset] |= tu_pack_clear_component_value(v, ch) << bit_shift;
|
||||
bit_shift += ch->size;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tu_2d_clear_color(const VkClearColorValue *val, VkFormat format, uint32_t buf[4])
|
||||
{
|
||||
const struct util_format_description *desc = vk_format_description(format);
|
||||
|
||||
/* not supported by 2D engine, cleared as U32 */
|
||||
if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
|
||||
buf[0] = float3_to_rgb9e5(val->float32);
|
||||
return;
|
||||
}
|
||||
|
||||
enum a6xx_2d_ifmt ifmt = tu6_fmt_to_ifmt(tu6_get_native_format(format).fmt);
|
||||
|
||||
assert(desc && (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
|
||||
format == VK_FORMAT_B10G11R11_UFLOAT_PACK32));
|
||||
|
||||
for (unsigned i = 0; i < desc->nr_channels; i++) {
|
||||
const struct util_format_channel_description *ch = &desc->channel[i];
|
||||
|
||||
switch (ifmt) {
|
||||
case R2D_INT32:
|
||||
case R2D_INT16:
|
||||
case R2D_INT8:
|
||||
case R2D_FLOAT32:
|
||||
buf[i] = val->uint32[i];
|
||||
break;
|
||||
case R2D_FLOAT16:
|
||||
buf[i] = util_float_to_half(val->float32[i]);
|
||||
break;
|
||||
case R2D_UNORM8: {
|
||||
float linear = val->float32[i];
|
||||
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && i < 3)
|
||||
linear = util_format_linear_to_srgb_float(val->float32[i]);
|
||||
|
||||
if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
|
||||
buf[i] = tu_pack_float32_for_snorm(linear, 8);
|
||||
else
|
||||
buf[i] = tu_pack_float32_for_unorm(linear, 8);
|
||||
} break;
|
||||
default:
|
||||
unreachable("unexpected ifmt");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tu_2d_clear_zs(const VkClearDepthStencilValue *val, VkFormat format, uint32_t buf[4])
|
||||
{
|
||||
switch (format) {
|
||||
case VK_FORMAT_X8_D24_UNORM_PACK32:
|
||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
||||
buf[0] = tu_pack_float32_for_unorm(val->depth, 24);
|
||||
buf[1] = buf[0] >> 8;
|
||||
buf[2] = buf[0] >> 16;
|
||||
buf[3] = val->stencil;
|
||||
return;
|
||||
case VK_FORMAT_D16_UNORM:
|
||||
case VK_FORMAT_D32_SFLOAT:
|
||||
buf[0] = fui(val->depth);
|
||||
return;
|
||||
case VK_FORMAT_S8_UINT:
|
||||
buf[0] = val->stencil;
|
||||
return;
|
||||
default:
|
||||
unreachable("unexpected zs format");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu_physical_device_get_format_properties(
|
||||
struct tu_physical_device *physical_device,
|
||||
|
@@ -111,13 +111,6 @@ tu_image_create(VkDevice _device,
|
||||
ubwc_enabled = false;
|
||||
}
|
||||
|
||||
/* using UBWC with D24S8 breaks the "stencil read" copy path (why?)
|
||||
* (causes any deqp tests that need to check stencil to fail)
|
||||
* disable UBWC for this format until we properly support copy aspect masks
|
||||
*/
|
||||
if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
ubwc_enabled = false;
|
||||
|
||||
/* UBWC can't be used with E5B9G9R9 */
|
||||
if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
|
||||
ubwc_enabled = false;
|
||||
@@ -166,7 +159,7 @@ tu_image_create(VkDevice _device,
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static enum a6xx_tex_fetchsize
|
||||
enum a6xx_tex_fetchsize
|
||||
tu6_fetchsize(VkFormat format)
|
||||
{
|
||||
if (vk_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
|
||||
@@ -277,24 +270,27 @@ tu_image_view_init(struct tu_image_view *iview,
|
||||
memset(iview->descriptor, 0, sizeof(iview->descriptor));
|
||||
|
||||
struct tu_native_format fmt =
|
||||
tu6_format_texture(iview->vk_format, image->layout.tile_mode);
|
||||
tu6_format_image_src(image, iview->vk_format, iview->base_mip);
|
||||
uint64_t base_addr = tu_image_base(image, iview->base_mip, iview->base_layer);
|
||||
uint64_t ubwc_addr = tu_image_ubwc_base(image, iview->base_mip, iview->base_layer);
|
||||
|
||||
uint32_t pitch = tu_image_stride(image, iview->base_mip) / vk_format_get_blockwidth(iview->vk_format);
|
||||
enum a6xx_tile_mode tile_mode = tu6_get_image_tile_mode(image, iview->base_mip);
|
||||
uint32_t pitch = tu_image_pitch(image, iview->base_mip);
|
||||
uint32_t width = iview->extent.width;
|
||||
uint32_t height = iview->extent.height;
|
||||
uint32_t depth = pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_3D ?
|
||||
iview->extent.depth : iview->layer_count;
|
||||
|
||||
unsigned fmt_tex = fmt.fmt;
|
||||
if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT &&
|
||||
iview->vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
|
||||
fmt_tex = FMT6_S8Z24_UINT;
|
||||
if (fmt_tex == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8) {
|
||||
if (iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
fmt_tex = FMT6_Z24_UNORM_S8_UINT;
|
||||
if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
fmt_tex = FMT6_S8Z24_UINT;
|
||||
/* TODO: also use this format with storage descriptor ? */
|
||||
}
|
||||
|
||||
iview->descriptor[0] =
|
||||
A6XX_TEX_CONST_0_TILE_MODE(tile_mode) |
|
||||
A6XX_TEX_CONST_0_TILE_MODE(fmt.tile_mode) |
|
||||
COND(vk_format_is_srgb(iview->vk_format), A6XX_TEX_CONST_0_SRGB) |
|
||||
A6XX_TEX_CONST_0_FMT(fmt_tex) |
|
||||
A6XX_TEX_CONST_0_SAMPLES(tu_msaa_samples(image->samples)) |
|
||||
@@ -335,7 +331,7 @@ tu_image_view_init(struct tu_image_view *iview,
|
||||
|
||||
iview->storage_descriptor[0] =
|
||||
A6XX_IBO_0_FMT(fmt.fmt) |
|
||||
A6XX_IBO_0_TILE_MODE(tile_mode);
|
||||
A6XX_IBO_0_TILE_MODE(fmt.tile_mode);
|
||||
iview->storage_descriptor[1] =
|
||||
A6XX_IBO_1_WIDTH(width) |
|
||||
A6XX_IBO_1_HEIGHT(height);
|
||||
|
@@ -1,91 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "tu_private.h"
|
||||
|
||||
#include "tu_blit.h"
|
||||
|
||||
static void
|
||||
tu_blit_image(struct tu_cmd_buffer *cmdbuf,
|
||||
struct tu_image *src_image,
|
||||
struct tu_image *dst_image,
|
||||
const VkImageBlit *info,
|
||||
VkFilter filter)
|
||||
{
|
||||
static const enum a6xx_rotation rotate[2][2] = {
|
||||
{ROTATE_0, ROTATE_HFLIP},
|
||||
{ROTATE_VFLIP, ROTATE_180},
|
||||
};
|
||||
bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) !=
|
||||
(info->dstOffsets[1].x < info->dstOffsets[0].x);
|
||||
bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) !=
|
||||
(info->dstOffsets[1].y < info->dstOffsets[0].y);
|
||||
bool mirror_z = (info->srcOffsets[1].z < info->srcOffsets[0].z) !=
|
||||
(info->dstOffsets[1].z < info->dstOffsets[0].z);
|
||||
|
||||
if (mirror_z) {
|
||||
tu_finishme("blit z mirror\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (info->srcOffsets[1].z - info->srcOffsets[0].z !=
|
||||
info->dstOffsets[1].z - info->dstOffsets[0].z) {
|
||||
tu_finishme("blit z filter\n");
|
||||
return;
|
||||
}
|
||||
assert(info->dstSubresource.layerCount == info->srcSubresource.layerCount);
|
||||
|
||||
struct tu_blit blt = {
|
||||
.dst = tu_blit_surf(dst_image, info->dstSubresource, info->dstOffsets),
|
||||
.src = tu_blit_surf(src_image, info->srcSubresource, info->srcOffsets),
|
||||
.layers = MAX2(info->srcOffsets[1].z - info->srcOffsets[0].z,
|
||||
info->dstSubresource.layerCount),
|
||||
.filter = filter == VK_FILTER_LINEAR,
|
||||
.rotation = rotate[mirror_y][mirror_x],
|
||||
};
|
||||
|
||||
tu_blit(cmdbuf, &cmdbuf->cs, &blt);
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdBlitImage(VkCommandBuffer commandBuffer,
|
||||
VkImage srcImage,
|
||||
VkImageLayout srcImageLayout,
|
||||
VkImage destImage,
|
||||
VkImageLayout destImageLayout,
|
||||
uint32_t regionCount,
|
||||
const VkImageBlit *pRegions,
|
||||
VkFilter filter)
|
||||
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||
TU_FROM_HANDLE(tu_image, src_image, srcImage);
|
||||
TU_FROM_HANDLE(tu_image, dst_image, destImage);
|
||||
|
||||
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
|
||||
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
for (uint32_t i = 0; i < regionCount; ++i) {
|
||||
tu_blit_image(cmdbuf, src_image, dst_image, pRegions + i, filter);
|
||||
}
|
||||
}
|
@@ -1,75 +0,0 @@
|
||||
#include "tu_private.h"
|
||||
#include "tu_blit.h"
|
||||
#include "tu_cs.h"
|
||||
|
||||
void
|
||||
tu_CmdFillBuffer(VkCommandBuffer commandBuffer,
|
||||
VkBuffer dstBuffer,
|
||||
VkDeviceSize dstOffset,
|
||||
VkDeviceSize fillSize,
|
||||
uint32_t data)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
|
||||
|
||||
if (fillSize == VK_WHOLE_SIZE)
|
||||
fillSize = buffer->size - dstOffset;
|
||||
|
||||
tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
tu_blit(cmd, &cmd->cs, &(struct tu_blit) {
|
||||
.dst = {
|
||||
.fmt = VK_FORMAT_R32_UINT,
|
||||
.va = tu_buffer_iova(buffer) + dstOffset,
|
||||
.width = fillSize / 4,
|
||||
.height = 1,
|
||||
.samples = 1,
|
||||
},
|
||||
.layers = 1,
|
||||
.clear_value[0] = data,
|
||||
.type = TU_BLIT_CLEAR,
|
||||
.buffer = true,
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
|
||||
VkBuffer dstBuffer,
|
||||
VkDeviceSize dstOffset,
|
||||
VkDeviceSize dataSize,
|
||||
const void *pData)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
|
||||
|
||||
tu_bo_list_add(&cmd->bo_list, buffer->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
struct ts_cs_memory tmp;
|
||||
VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64, &tmp);
|
||||
if (result != VK_SUCCESS) {
|
||||
cmd->record_result = result;
|
||||
return;
|
||||
}
|
||||
|
||||
memcpy(tmp.map, pData, dataSize);
|
||||
|
||||
tu_blit(cmd, &cmd->cs, &(struct tu_blit) {
|
||||
.dst = {
|
||||
.fmt = VK_FORMAT_R32_UINT,
|
||||
.va = tu_buffer_iova(buffer) + dstOffset,
|
||||
.width = dataSize / 4,
|
||||
.height = 1,
|
||||
.samples = 1,
|
||||
},
|
||||
.src = {
|
||||
.fmt = VK_FORMAT_R32_UINT,
|
||||
.va = tmp.iova,
|
||||
.width = dataSize / 4,
|
||||
.height = 1,
|
||||
.samples = 1,
|
||||
},
|
||||
.layers = 1,
|
||||
.type = TU_BLIT_COPY,
|
||||
.buffer = true,
|
||||
});
|
||||
}
|
@@ -1,238 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "tu_private.h"
|
||||
#include "tu_blit.h"
|
||||
#include "tu_cs.h"
|
||||
|
||||
static void
|
||||
clear_image(struct tu_cmd_buffer *cmdbuf,
|
||||
struct tu_image *image,
|
||||
uint32_t clear_value[4],
|
||||
const VkImageSubresourceRange *range)
|
||||
{
|
||||
uint32_t level_count = tu_get_levelCount(image, range);
|
||||
uint32_t layer_count = tu_get_layerCount(image, range);
|
||||
|
||||
if (image->type == VK_IMAGE_TYPE_3D) {
|
||||
assert(layer_count == 1);
|
||||
assert(range->baseArrayLayer == 0);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < level_count; j++) {
|
||||
if (image->type == VK_IMAGE_TYPE_3D)
|
||||
layer_count = u_minify(image->extent.depth, range->baseMipLevel + j);
|
||||
|
||||
tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
|
||||
.dst = tu_blit_surf_whole(image, range->baseMipLevel + j, range->baseArrayLayer),
|
||||
.layers = layer_count,
|
||||
.clear_value = {clear_value[0], clear_value[1], clear_value[2], clear_value[3]},
|
||||
.type = TU_BLIT_CLEAR,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdClearColorImage(VkCommandBuffer commandBuffer,
|
||||
VkImage image_h,
|
||||
VkImageLayout imageLayout,
|
||||
const VkClearColorValue *pColor,
|
||||
uint32_t rangeCount,
|
||||
const VkImageSubresourceRange *pRanges)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||
TU_FROM_HANDLE(tu_image, image, image_h);
|
||||
uint32_t clear_value[4] = {};
|
||||
|
||||
tu_2d_clear_color(pColor, image->vk_format, clear_value);
|
||||
|
||||
tu_bo_list_add(&cmdbuf->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
for (unsigned i = 0; i < rangeCount; i++)
|
||||
clear_image(cmdbuf, image, clear_value, pRanges + i);
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
|
||||
VkImage image_h,
|
||||
VkImageLayout imageLayout,
|
||||
const VkClearDepthStencilValue *pDepthStencil,
|
||||
uint32_t rangeCount,
|
||||
const VkImageSubresourceRange *pRanges)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||
TU_FROM_HANDLE(tu_image, image, image_h);
|
||||
uint32_t clear_value[4] = {};
|
||||
|
||||
tu_2d_clear_zs(pDepthStencil, image->vk_format, clear_value);
|
||||
|
||||
tu_bo_list_add(&cmdbuf->bo_list, image->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
for (unsigned i = 0; i < rangeCount; i++)
|
||||
clear_image(cmdbuf, image, clear_value, pRanges + i);
|
||||
}
|
||||
|
||||
void
|
||||
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t attachment,
|
||||
const VkClearValue *value,
|
||||
const VkClearRect *rect)
|
||||
{
|
||||
if (!cmd->state.framebuffer) {
|
||||
tu_finishme("sysmem CmdClearAttachments in secondary command buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
const struct tu_image_view *iview =
|
||||
cmd->state.framebuffer->attachments[attachment].attachment;
|
||||
|
||||
uint32_t clear_vals[4] = { 0 };
|
||||
if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT |
|
||||
VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
||||
tu_2d_clear_zs(&value->depthStencil, iview->vk_format,
|
||||
clear_vals);
|
||||
} else {
|
||||
tu_2d_clear_color(&value->color, iview->vk_format,
|
||||
clear_vals);
|
||||
}
|
||||
|
||||
tu_blit(cmd, cs, &(struct tu_blit) {
|
||||
.dst = sysmem_attachment_surf(iview, rect->baseArrayLayer, &rect->rect),
|
||||
.layers = rect->layerCount,
|
||||
.clear_value = { clear_vals[0], clear_vals[1], clear_vals[2], clear_vals[3] },
|
||||
.type = TU_BLIT_CLEAR,
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t attachment,
|
||||
uint8_t component_mask,
|
||||
const VkClearValue *value)
|
||||
{
|
||||
VkFormat fmt = cmd->state.pass->attachments[attachment].format;
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(fmt)));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1);
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(component_mask));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
|
||||
tu_cs_emit(cs, cmd->state.pass->attachments[attachment].gmem_offset);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
|
||||
uint32_t clear_vals[4] = { 0 };
|
||||
tu_pack_clear_value(value, fmt, clear_vals);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
|
||||
tu_cs_emit(cs, clear_vals[0]);
|
||||
tu_cs_emit(cs, clear_vals[1]);
|
||||
tu_cs_emit(cs, clear_vals[2]);
|
||||
tu_cs_emit(cs, clear_vals[3]);
|
||||
|
||||
tu6_emit_event_write(cmd, cs, BLIT, false);
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdClearAttachments(VkCommandBuffer commandBuffer,
|
||||
uint32_t attachmentCount,
|
||||
const VkClearAttachment *pAttachments,
|
||||
uint32_t rectCount,
|
||||
const VkClearRect *pRects)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
const struct tu_subpass *subpass = cmd->state.subpass;
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
|
||||
|
||||
for (unsigned i = 0; i < rectCount; i++) {
|
||||
unsigned x1 = pRects[i].rect.offset.x;
|
||||
unsigned y1 = pRects[i].rect.offset.y;
|
||||
unsigned x2 = x1 + pRects[i].rect.extent.width - 1;
|
||||
unsigned y2 = y1 + pRects[i].rect.extent.height - 1;
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
|
||||
tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
|
||||
|
||||
for (unsigned j = 0; j < attachmentCount; j++) {
|
||||
uint32_t a;
|
||||
unsigned clear_mask = 0;
|
||||
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
clear_mask = 0xf;
|
||||
a = subpass->color_attachments[pAttachments[j].colorAttachment].attachment;
|
||||
} else {
|
||||
a = subpass->depth_stencil_attachment.attachment;
|
||||
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
clear_mask |= 1;
|
||||
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
clear_mask |= 2;
|
||||
}
|
||||
|
||||
if (a == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
tu_clear_gmem_attachment(cmd, cs, a, clear_mask,
|
||||
&pAttachments[j].clearValue);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
tu_cond_exec_end(cs);
|
||||
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
|
||||
|
||||
for (unsigned i = 0; i < rectCount; i++) {
|
||||
for (unsigned j = 0; j < attachmentCount; j++) {
|
||||
uint32_t a;
|
||||
unsigned clear_mask = 0;
|
||||
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
clear_mask = 0xf;
|
||||
a = subpass->color_attachments[pAttachments[j].colorAttachment].attachment;
|
||||
} else {
|
||||
a = subpass->depth_stencil_attachment.attachment;
|
||||
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
clear_mask |= 1;
|
||||
if (pAttachments[j].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
clear_mask |= 2;
|
||||
if (clear_mask != 3)
|
||||
tu_finishme("sysmem depth/stencil only clears");
|
||||
}
|
||||
|
||||
if (a == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
tu_clear_sysmem_attachment(cmd, cs, a,
|
||||
&pAttachments[j].clearValue,
|
||||
&pRects[i]);
|
||||
}
|
||||
}
|
||||
|
||||
tu_cond_exec_end(cs);
|
||||
}
|
@@ -1,215 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "tu_private.h"
|
||||
|
||||
#include "a6xx.xml.h"
|
||||
#include "adreno_common.xml.h"
|
||||
#include "adreno_pm4.xml.h"
|
||||
|
||||
#include "vk_format.h"
|
||||
|
||||
#include "tu_cs.h"
|
||||
#include "tu_blit.h"
|
||||
|
||||
static void
|
||||
tu_copy_buffer(struct tu_cmd_buffer *cmd,
|
||||
struct tu_buffer *src,
|
||||
struct tu_buffer *dst,
|
||||
const VkBufferCopy *region)
|
||||
{
|
||||
tu_bo_list_add(&cmd->bo_list, src->bo, MSM_SUBMIT_BO_READ);
|
||||
tu_bo_list_add(&cmd->bo_list, dst->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
tu_blit(cmd, &cmd->cs, &(struct tu_blit) {
|
||||
.dst = {
|
||||
.fmt = VK_FORMAT_R8_UNORM,
|
||||
.va = tu_buffer_iova(dst) + region->dstOffset,
|
||||
.width = region->size,
|
||||
.height = 1,
|
||||
.samples = 1,
|
||||
},
|
||||
.src = {
|
||||
.fmt = VK_FORMAT_R8_UNORM,
|
||||
.va = tu_buffer_iova(src) + region->srcOffset,
|
||||
.width = region->size,
|
||||
.height = 1,
|
||||
.samples = 1,
|
||||
},
|
||||
.layers = 1,
|
||||
.type = TU_BLIT_COPY,
|
||||
.buffer = true,
|
||||
});
|
||||
}
|
||||
|
||||
static struct tu_blit_surf
|
||||
tu_blit_buffer(struct tu_buffer *buffer,
|
||||
VkFormat format,
|
||||
const VkBufferImageCopy *info)
|
||||
{
|
||||
if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
format = VK_FORMAT_R8_UNORM;
|
||||
|
||||
unsigned pitch = (info->bufferRowLength ?: info->imageExtent.width) *
|
||||
vk_format_get_blocksize(format);
|
||||
|
||||
return (struct tu_blit_surf) {
|
||||
.fmt = format,
|
||||
.tile_mode = TILE6_LINEAR,
|
||||
.va = tu_buffer_iova(buffer) + info->bufferOffset,
|
||||
.pitch = pitch,
|
||||
.layer_size = (info->bufferImageHeight ?: info->imageExtent.height) * pitch / vk_format_get_blockwidth(format) / vk_format_get_blockheight(format),
|
||||
.width = info->imageExtent.width,
|
||||
.height = info->imageExtent.height,
|
||||
.samples = 1,
|
||||
};
|
||||
}
|
||||
|
||||
static void
|
||||
tu_copy_buffer_to_image(struct tu_cmd_buffer *cmdbuf,
|
||||
struct tu_buffer *src_buffer,
|
||||
struct tu_image *dst_image,
|
||||
const VkBufferImageCopy *info)
|
||||
{
|
||||
if (info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
|
||||
vk_format_get_blocksize(dst_image->vk_format) == 4) {
|
||||
tu_finishme("aspect mask\n");
|
||||
return;
|
||||
}
|
||||
|
||||
tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
|
||||
.dst = tu_blit_surf_ext(dst_image, info->imageSubresource, info->imageOffset, info->imageExtent),
|
||||
.src = tu_blit_buffer(src_buffer, dst_image->vk_format, info),
|
||||
.layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
|
||||
.type = TU_BLIT_COPY,
|
||||
});
|
||||
}
|
||||
|
||||
static void
|
||||
tu_copy_image_to_buffer(struct tu_cmd_buffer *cmdbuf,
|
||||
struct tu_image *src_image,
|
||||
struct tu_buffer *dst_buffer,
|
||||
const VkBufferImageCopy *info)
|
||||
{
|
||||
tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
|
||||
.dst = tu_blit_buffer(dst_buffer, src_image->vk_format, info),
|
||||
.src = tu_blit_surf_ext(src_image, info->imageSubresource, info->imageOffset, info->imageExtent),
|
||||
.layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount),
|
||||
.type = TU_BLIT_COPY,
|
||||
});
|
||||
}
|
||||
|
||||
static void
|
||||
tu_copy_image_to_image(struct tu_cmd_buffer *cmdbuf,
|
||||
struct tu_image *src_image,
|
||||
struct tu_image *dst_image,
|
||||
const VkImageCopy *info)
|
||||
{
|
||||
if ((info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
|
||||
vk_format_get_blocksize(dst_image->vk_format) == 4) ||
|
||||
(info->srcSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT &&
|
||||
vk_format_get_blocksize(src_image->vk_format) == 4)) {
|
||||
tu_finishme("aspect mask\n");
|
||||
return;
|
||||
}
|
||||
|
||||
tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
|
||||
.dst = tu_blit_surf_ext(dst_image, info->dstSubresource, info->dstOffset, info->extent),
|
||||
.src = tu_blit_surf_ext(src_image, info->srcSubresource, info->srcOffset, info->extent),
|
||||
.layers = info->extent.depth,
|
||||
.type = TU_BLIT_COPY,
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
|
||||
VkBuffer srcBuffer,
|
||||
VkBuffer destBuffer,
|
||||
uint32_t regionCount,
|
||||
const VkBufferCopy *pRegions)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||
TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
|
||||
TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
|
||||
|
||||
for (unsigned i = 0; i < regionCount; ++i)
|
||||
tu_copy_buffer(cmdbuf, src_buffer, dst_buffer, &pRegions[i]);
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
|
||||
VkBuffer srcBuffer,
|
||||
VkImage destImage,
|
||||
VkImageLayout destImageLayout,
|
||||
uint32_t regionCount,
|
||||
const VkBufferImageCopy *pRegions)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||
TU_FROM_HANDLE(tu_image, dst_image, destImage);
|
||||
TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
|
||||
|
||||
tu_bo_list_add(&cmdbuf->bo_list, src_buffer->bo, MSM_SUBMIT_BO_READ);
|
||||
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
for (unsigned i = 0; i < regionCount; ++i)
|
||||
tu_copy_buffer_to_image(cmdbuf, src_buffer, dst_image, pRegions + i);
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
|
||||
VkImage srcImage,
|
||||
VkImageLayout srcImageLayout,
|
||||
VkBuffer destBuffer,
|
||||
uint32_t regionCount,
|
||||
const VkBufferImageCopy *pRegions)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||
TU_FROM_HANDLE(tu_image, src_image, srcImage);
|
||||
TU_FROM_HANDLE(tu_buffer, dst_buffer, destBuffer);
|
||||
|
||||
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
|
||||
tu_bo_list_add(&cmdbuf->bo_list, dst_buffer->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
for (unsigned i = 0; i < regionCount; ++i)
|
||||
tu_copy_image_to_buffer(cmdbuf, src_image, dst_buffer, pRegions + i);
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdCopyImage(VkCommandBuffer commandBuffer,
|
||||
VkImage srcImage,
|
||||
VkImageLayout srcImageLayout,
|
||||
VkImage destImage,
|
||||
VkImageLayout destImageLayout,
|
||||
uint32_t regionCount,
|
||||
const VkImageCopy *pRegions)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, commandBuffer);
|
||||
TU_FROM_HANDLE(tu_image, src_image, srcImage);
|
||||
TU_FROM_HANDLE(tu_image, dst_image, destImage);
|
||||
|
||||
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
|
||||
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
for (uint32_t i = 0; i < regionCount; ++i)
|
||||
tu_copy_image_to_image(cmdbuf, src_image, dst_image, pRegions + i);
|
||||
}
|
@@ -1,67 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "tu_private.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "nir/nir_builder.h"
|
||||
#include "vk_format.h"
|
||||
|
||||
#include "tu_blit.h"
|
||||
|
||||
static void
|
||||
tu_resolve_image(struct tu_cmd_buffer *cmdbuf,
|
||||
struct tu_image *src_image,
|
||||
struct tu_image *dst_image,
|
||||
const VkImageResolve *info)
|
||||
{
|
||||
assert(info->dstSubresource.layerCount == info->srcSubresource.layerCount);
|
||||
|
||||
tu_blit(cmdbuf, &cmdbuf->cs, &(struct tu_blit) {
|
||||
.dst = tu_blit_surf_ext(dst_image, info->dstSubresource, info->dstOffset, info->extent),
|
||||
.src = tu_blit_surf_ext(src_image, info->srcSubresource, info->srcOffset, info->extent),
|
||||
.layers = MAX2(info->extent.depth, info->dstSubresource.layerCount)
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdResolveImage(VkCommandBuffer cmd_buffer_h,
|
||||
VkImage src_image_h,
|
||||
VkImageLayout src_image_layout,
|
||||
VkImage dest_image_h,
|
||||
VkImageLayout dest_image_layout,
|
||||
uint32_t region_count,
|
||||
const VkImageResolve *regions)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, cmd_buffer_h);
|
||||
TU_FROM_HANDLE(tu_image, src_image, src_image_h);
|
||||
TU_FROM_HANDLE(tu_image, dst_image, dest_image_h);
|
||||
|
||||
tu_bo_list_add(&cmdbuf->bo_list, src_image->bo, MSM_SUBMIT_BO_READ);
|
||||
tu_bo_list_add(&cmdbuf->bo_list, dst_image->bo, MSM_SUBMIT_BO_WRITE);
|
||||
|
||||
for (uint32_t i = 0; i < region_count; ++i)
|
||||
tu_resolve_image(cmdbuf, src_image, dst_image, regions + i);
|
||||
}
|
@@ -39,7 +39,8 @@ static void update_samples(struct tu_subpass *subpass,
|
||||
#define GMEM_ALIGN 0x4000
|
||||
|
||||
static void
|
||||
compute_gmem_offsets(struct tu_render_pass *pass, uint32_t gmem_size)
|
||||
compute_gmem_offsets(struct tu_render_pass *pass,
|
||||
const struct tu_physical_device *phys_dev)
|
||||
{
|
||||
/* calculate total bytes per pixel */
|
||||
uint32_t cpp_total = 0;
|
||||
@@ -56,12 +57,14 @@ compute_gmem_offsets(struct tu_render_pass *pass, uint32_t gmem_size)
|
||||
return;
|
||||
}
|
||||
|
||||
/* TODO: this algorithm isn't optimal
|
||||
/* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path
|
||||
* doesn't break things. maybe there is a better solution?
|
||||
* TODO: this algorithm isn't optimal
|
||||
* for example, two attachments with cpp = {1, 4}
|
||||
* result: nblocks = {12, 52}, pixels = 196608
|
||||
* optimal: nblocks = {13, 51}, pixels = 208896
|
||||
*/
|
||||
uint32_t gmem_blocks = gmem_size / GMEM_ALIGN;
|
||||
uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / GMEM_ALIGN;
|
||||
uint32_t offset = 0, pixels = ~0u;
|
||||
for (uint32_t i = 0; i < pass->attachment_count; i++) {
|
||||
struct tu_render_pass_attachment *att = &pass->attachments[i];
|
||||
@@ -206,7 +209,7 @@ tu_CreateRenderPass(VkDevice _device,
|
||||
|
||||
*pRenderPass = tu_render_pass_to_handle(pass);
|
||||
|
||||
compute_gmem_offsets(pass, device->physical_device->gmem_size);
|
||||
compute_gmem_offsets(pass, device->physical_device);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
@@ -335,7 +338,7 @@ tu_CreateRenderPass2(VkDevice _device,
|
||||
|
||||
*pRenderPass = tu_render_pass_to_handle(pass);
|
||||
|
||||
compute_gmem_offsets(pass, device->physical_device->gmem_size);
|
||||
compute_gmem_offsets(pass, device->physical_device);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
@@ -77,6 +77,8 @@ typedef uint32_t xcb_window_t;
|
||||
|
||||
#include "tu_entrypoints.h"
|
||||
|
||||
#include "vk_format.h"
|
||||
|
||||
#define MAX_VBS 32
|
||||
#define MAX_VERTEX_ATTRIBS 32
|
||||
#define MAX_RTS 8
|
||||
@@ -1284,6 +1286,48 @@ tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back);
|
||||
void
|
||||
tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]);
|
||||
|
||||
void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples);
|
||||
|
||||
void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);
|
||||
|
||||
void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
|
||||
|
||||
struct tu_image_view;
|
||||
|
||||
void
|
||||
tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
struct tu_image_view *src,
|
||||
struct tu_image_view *dst,
|
||||
uint32_t layers,
|
||||
const VkRect2D *rect);
|
||||
|
||||
void
|
||||
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t a,
|
||||
const VkRenderPassBeginInfo *info);
|
||||
|
||||
void
|
||||
tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t a,
|
||||
const VkRenderPassBeginInfo *info);
|
||||
|
||||
void
|
||||
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);
|
||||
|
||||
/* expose this function to be able to emit load without checking LOAD_OP */
|
||||
void
|
||||
tu_emit_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);
|
||||
|
||||
/* note: gmem store can also resolve */
|
||||
void
|
||||
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t a,
|
||||
uint32_t gmem_a);
|
||||
|
||||
struct tu_userdata_info *
|
||||
tu_lookup_user_sgpr(struct tu_pipeline *pipeline,
|
||||
gl_shader_stage stage,
|
||||
@@ -1330,18 +1374,6 @@ tu6_base_format(VkFormat format)
|
||||
return tu6_format_color(format, TILE6_LINEAR).fmt;
|
||||
}
|
||||
|
||||
void
|
||||
tu_pack_clear_value(const VkClearValue *val,
|
||||
VkFormat format,
|
||||
uint32_t buf[4]);
|
||||
|
||||
void
|
||||
tu_2d_clear_color(const VkClearColorValue *val, VkFormat format, uint32_t buf[4]);
|
||||
|
||||
void
|
||||
tu_2d_clear_zs(const VkClearDepthStencilValue *val, VkFormat format, uint32_t buf[4]);
|
||||
|
||||
enum a6xx_2d_ifmt tu6_fmt_to_ifmt(enum a6xx_format fmt);
|
||||
enum a6xx_depth_format tu6_pipe2depth(VkFormat format);
|
||||
|
||||
struct tu_image
|
||||
@@ -1409,6 +1441,14 @@ tu_image_stride(struct tu_image *image, int level)
|
||||
return image->layout.slices[level].pitch * image->layout.cpp;
|
||||
}
|
||||
|
||||
/* to get the right pitch for compressed formats */
|
||||
static inline uint32_t
|
||||
tu_image_pitch(struct tu_image *image, int level)
|
||||
{
|
||||
uint32_t stride = tu_image_stride(image, level);
|
||||
return stride / vk_format_get_blockwidth(image->vk_format);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
tu_image_base(struct tu_image *image, int level, int layer)
|
||||
{
|
||||
@@ -1458,10 +1498,16 @@ tu_image_ubwc_base(struct tu_image *image, int level, int layer)
|
||||
#define tu_image_view_ubwc_base_ref(iview) \
|
||||
tu_image_ubwc_base_ref(iview->image, iview->base_mip, iview->base_layer)
|
||||
|
||||
#define tu_image_view_ubwc_pitches(iview) \
|
||||
.pitch = tu_image_ubwc_pitch(iview->image, iview->base_mip), \
|
||||
.array_pitch = tu_image_ubwc_size(iview->image, iview->base_mip) >> 2
|
||||
|
||||
enum a6xx_tile_mode
|
||||
tu6_get_image_tile_mode(struct tu_image *image, int level);
|
||||
enum a3xx_msaa_samples
|
||||
tu_msaa_samples(uint32_t samples);
|
||||
enum a6xx_tex_fetchsize
|
||||
tu6_fetchsize(VkFormat format);
|
||||
|
||||
static inline struct tu_native_format
|
||||
tu6_format_image(struct tu_image *image, VkFormat format, uint32_t level)
|
||||
@@ -1705,21 +1751,6 @@ tu_gem_info_offset(const struct tu_device *dev, uint32_t gem_handle);
|
||||
uint64_t
|
||||
tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle);
|
||||
|
||||
|
||||
void
|
||||
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t attachment,
|
||||
const VkClearValue *value,
|
||||
const VkClearRect *rect);
|
||||
|
||||
void
|
||||
tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs,
|
||||
uint32_t attachment,
|
||||
uint8_t component_mask,
|
||||
const VkClearValue *value);
|
||||
|
||||
#define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \
|
||||
\
|
||||
static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \
|
||||
|
Reference in New Issue
Block a user