radv: partial sdma support
SDMA code adapted from https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12763 The only supported use case is image (linear or tiled) -> buffer and only GFX9+ is supported (for now). Since RADV_QUEUE_TRANSFER aren't exposed to applications, this cannot be used, except by the driver. Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13959>
This commit is contained in:

committed by
Marge Bot

parent
148b2d0040
commit
7b5ab48c40
@@ -77,6 +77,7 @@ libradv_files = files(
|
||||
'radv_pipeline_rt.c',
|
||||
'radv_private.h',
|
||||
'radv_radeon_winsys.h',
|
||||
'radv_sdma_copy_image.c',
|
||||
'radv_shader.c',
|
||||
'radv_shader.h',
|
||||
'radv_shader_args.c',
|
||||
|
@@ -64,6 +64,7 @@ enum {
|
||||
RADV_DEBUG_NO_ATOC_DITHERING = 1ull << 33,
|
||||
RADV_DEBUG_NO_NGGC = 1ull << 34,
|
||||
RADV_DEBUG_DUMP_PROLOGS = 1ull << 35,
|
||||
RADV_DEBUG_NO_DMA_BLIT = 1ull << 36,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@@ -856,6 +856,7 @@ static const struct debug_control radv_debug_options[] = {
|
||||
{"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING},
|
||||
{"nonggc", RADV_DEBUG_NO_NGGC},
|
||||
{"prologs", RADV_DEBUG_DUMP_PROLOGS},
|
||||
{"nodma", RADV_DEBUG_NO_DMA_BLIT},
|
||||
{NULL, 0}};
|
||||
|
||||
const char *
|
||||
@@ -3777,6 +3778,9 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
|
||||
unsigned tess_offchip_ring_offset;
|
||||
uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
|
||||
VkResult result = VK_SUCCESS;
|
||||
if (queue->vk.queue_family_index == RADV_QUEUE_TRANSFER)
|
||||
return VK_SUCCESS;
|
||||
|
||||
if (!queue->has_tess_rings) {
|
||||
if (needs_tess_rings)
|
||||
add_tess_rings = true;
|
||||
|
@@ -278,6 +278,19 @@ copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buf
|
||||
struct radv_image *image, VkImageLayout layout,
|
||||
const VkBufferImageCopy2KHR *region)
|
||||
{
|
||||
if (cmd_buffer->pool->queue_family_index == RADV_QUEUE_TRANSFER) {
|
||||
/* RADV_QUEUE_TRANSFER should only be used for the prime blit */
|
||||
assert(!region->imageOffset.x && !region->imageOffset.y && !region->imageOffset.z);
|
||||
assert(image->type == VK_IMAGE_TYPE_2D);
|
||||
assert(image->info.width == region->imageExtent.width);
|
||||
assert(image->info.height == region->imageExtent.height);
|
||||
ASSERTED bool res = radv_sdma_copy_image(cmd_buffer, image, buffer, region);
|
||||
assert(res);
|
||||
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, image->bo);
|
||||
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, buffer->bo);
|
||||
return;
|
||||
}
|
||||
|
||||
struct radv_meta_saved_state saved_state;
|
||||
bool old_predicating;
|
||||
|
||||
|
@@ -2596,6 +2596,9 @@ void radv_emit_thread_trace_userdata(const struct radv_device *device, struct ra
|
||||
const void *data, uint32_t num_dwords);
|
||||
bool radv_is_instruction_timing_enabled(void);
|
||||
|
||||
bool radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
|
||||
struct radv_buffer *buffer, const VkBufferImageCopy2KHR *region);
|
||||
|
||||
/* radv_sqtt_layer_.c */
|
||||
struct radv_barrier_data {
|
||||
union {
|
||||
|
196
src/amd/vulkan/radv_sdma_copy_image.c
Normal file
196
src/amd/vulkan/radv_sdma_copy_image.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
|
||||
* Copyright 2015-2021 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "util/u_memory.h"
|
||||
#include "radv_cs.h"
|
||||
#include "radv_private.h"
|
||||
#include "sid.h"
|
||||
|
||||
static bool
|
||||
radv_translate_format_to_hw(struct radeon_info *info, VkFormat format, unsigned *hw_fmt,
|
||||
unsigned *hw_type)
|
||||
{
|
||||
const struct util_format_description *desc = vk_format_description(format);
|
||||
*hw_fmt = radv_translate_colorformat(format);
|
||||
|
||||
int firstchan;
|
||||
for (firstchan = 0; firstchan < 4; firstchan++) {
|
||||
if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {
|
||||
*hw_type = V_028C70_NUMBER_FLOAT;
|
||||
} else {
|
||||
*hw_type = V_028C70_NUMBER_UNORM;
|
||||
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
|
||||
*hw_type = V_028C70_NUMBER_SRGB;
|
||||
else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {
|
||||
if (desc->channel[firstchan].pure_integer) {
|
||||
*hw_type = V_028C70_NUMBER_SINT;
|
||||
} else {
|
||||
assert(desc->channel[firstchan].normalized);
|
||||
*hw_type = V_028C70_NUMBER_SNORM;
|
||||
}
|
||||
} else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {
|
||||
if (desc->channel[firstchan].pure_integer) {
|
||||
*hw_type = V_028C70_NUMBER_UINT;
|
||||
} else {
|
||||
assert(desc->channel[firstchan].normalized);
|
||||
*hw_type = V_028C70_NUMBER_UNORM;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
|
||||
struct radv_buffer *buffer,
|
||||
const VkBufferImageCopy2KHR *region)
|
||||
{
|
||||
assert(image->plane_count == 1);
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
unsigned bpp = image->planes[0].surface.bpe;
|
||||
uint64_t dst_address = buffer->bo->va;
|
||||
uint64_t src_address = image->bo->va + image->planes[0].surface.u.gfx9.surf_offset;
|
||||
unsigned src_pitch = image->planes[0].surface.u.gfx9.surf_pitch;
|
||||
unsigned copy_width = DIV_ROUND_UP(image->info.width, image->planes[0].surface.blk_w);
|
||||
unsigned copy_height = DIV_ROUND_UP(image->info.height, image->planes[0].surface.blk_h);
|
||||
bool tmz = false;
|
||||
|
||||
uint32_t ib_pad_dw_mask = cmd_buffer->device->physical_device->rad_info.ib_pad_dw_mask[RING_DMA];
|
||||
|
||||
/* Linear -> linear sub-window copy. */
|
||||
if (image->planes[0].surface.is_linear) {
|
||||
ASSERTED unsigned cdw_max =
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, align(8, ib_pad_dw_mask + 1));
|
||||
unsigned bytes = src_pitch * copy_height * bpp;
|
||||
|
||||
if (!(bytes < (1u << 22)))
|
||||
return false;
|
||||
|
||||
radeon_emit(cmd_buffer->cs, 0x00000000);
|
||||
|
||||
src_address += image->planes[0].surface.u.gfx9.offset[0];
|
||||
|
||||
radeon_emit(cmd_buffer->cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
|
||||
CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0)));
|
||||
radeon_emit(cmd_buffer->cs, bytes);
|
||||
radeon_emit(cmd_buffer->cs, 0);
|
||||
radeon_emit(cmd_buffer->cs, src_address);
|
||||
radeon_emit(cmd_buffer->cs, src_address >> 32);
|
||||
radeon_emit(cmd_buffer->cs, dst_address);
|
||||
radeon_emit(cmd_buffer->cs, dst_address >> 32);
|
||||
|
||||
while (cmd_buffer->cs->cdw & ib_pad_dw_mask)
|
||||
radeon_emit(cmd_buffer->cs, SDMA_NOP_PAD);
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
|
||||
return true;
|
||||
}
|
||||
/* Tiled sub-window copy -> Linear */
|
||||
else {
|
||||
unsigned tiled_width = copy_width;
|
||||
unsigned tiled_height = copy_height;
|
||||
unsigned linear_pitch = region->bufferRowLength;
|
||||
unsigned linear_slice_pitch = region->bufferRowLength * copy_height;
|
||||
uint64_t tiled_address = src_address;
|
||||
uint64_t linear_address = dst_address;
|
||||
bool is_v5 = device->physical_device->rad_info.chip_class >= GFX10;
|
||||
/* Only SDMA 5 supports DCC with SDMA */
|
||||
bool dcc = radv_dcc_enabled(image, 0) && is_v5;
|
||||
|
||||
/* Check if everything fits into the bitfields */
|
||||
if (!(tiled_width < (1 << 14) && tiled_height < (1 << 14) && linear_pitch < (1 << 14) &&
|
||||
linear_slice_pitch < (1 << 28) && copy_width < (1 << 14) && copy_height < (1 << 14)))
|
||||
return false;
|
||||
|
||||
ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
|
||||
align(15 + dcc * 3, ib_pad_dw_mask + 1));
|
||||
|
||||
radeon_emit(cmd_buffer->cs, 0x00000000);
|
||||
radeon_emit(cmd_buffer->cs,
|
||||
CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW,
|
||||
(tmz ? 4 : 0)) |
|
||||
dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 |
|
||||
1u << 31);
|
||||
radeon_emit(cmd_buffer->cs,
|
||||
(uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8));
|
||||
radeon_emit(cmd_buffer->cs, (uint32_t)(tiled_address >> 32));
|
||||
radeon_emit(cmd_buffer->cs, 0);
|
||||
radeon_emit(cmd_buffer->cs, ((tiled_width - 1) << 16));
|
||||
radeon_emit(cmd_buffer->cs, (tiled_height - 1));
|
||||
radeon_emit(
|
||||
cmd_buffer->cs,
|
||||
util_logbase2(bpp) | image->planes[0].surface.u.gfx9.swizzle_mode << 3 |
|
||||
image->planes[0].surface.u.gfx9.resource_type << 9 |
|
||||
(is_v5 ? 0 /* tiled->buffer.b.b.last_level */ : image->planes[0].surface.u.gfx9.epitch)
|
||||
<< 16);
|
||||
radeon_emit(cmd_buffer->cs, (uint32_t)linear_address);
|
||||
radeon_emit(cmd_buffer->cs, (uint32_t)(linear_address >> 32));
|
||||
radeon_emit(cmd_buffer->cs, 0);
|
||||
radeon_emit(cmd_buffer->cs, ((linear_pitch - 1) << 16));
|
||||
radeon_emit(cmd_buffer->cs, linear_slice_pitch - 1);
|
||||
radeon_emit(cmd_buffer->cs, (copy_width - 1) | ((copy_height - 1) << 16));
|
||||
radeon_emit(cmd_buffer->cs, 0);
|
||||
|
||||
if (dcc) {
|
||||
unsigned hw_fmt, hw_type;
|
||||
uint64_t md_address = tiled_address + image->planes[0].surface.meta_offset;
|
||||
|
||||
radv_translate_format_to_hw(&device->physical_device->rad_info, image->vk_format, &hw_fmt,
|
||||
&hw_type);
|
||||
|
||||
/* Add metadata */
|
||||
radeon_emit(cmd_buffer->cs, (uint32_t)md_address);
|
||||
radeon_emit(cmd_buffer->cs, (uint32_t)(md_address >> 32));
|
||||
radeon_emit(cmd_buffer->cs,
|
||||
hw_fmt | vi_alpha_is_on_msb(device, image->vk_format) << 8 | hw_type << 9 |
|
||||
image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
|
||||
V_028C78_MAX_BLOCK_SIZE_256B << 26 | tmz << 29 |
|
||||
image->planes[0].surface.u.gfx9.color.dcc.pipe_aligned << 31);
|
||||
}
|
||||
|
||||
while (cmd_buffer->cs->cdw & ib_pad_dw_mask)
|
||||
radeon_emit(cmd_buffer->cs, SDMA_NOP_PAD);
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
|
||||
struct radv_buffer *buffer, const VkBufferImageCopy2KHR *region)
|
||||
{
|
||||
assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9);
|
||||
return radv_sdma_v4_v5_copy_image_to_buffer(cmd_buffer, image, buffer, region);
|
||||
}
|
Reference in New Issue
Block a user