radv: add support for register shadowing

Signed-off-by: Yogesh Mohan Marimuthu <yogesh.mohanmarimuthu@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18301>
This commit is contained in:
Yogesh Mohan Marimuthu
2022-09-23 00:22:42 +05:30
committed by Marge Bot
parent db61db7f67
commit 97b9b2cf40
5 changed files with 199 additions and 7 deletions

View File

@@ -60,6 +60,7 @@ libradv_files = files(
'radv_acceleration_structure.h',
'radv_android.c',
'radv_cmd_buffer.c',
'radv_cp_reg_shadowing.c',
'radv_cs.h',
'radv_debug.c',
'radv_debug.h',

View File

@@ -0,0 +1,148 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "ac_shadowed_regs.h"
#include "radv_cs.h"
#include "radv_debug.h"
#include "radv_private.h"
#include "sid.h"
static void
radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
const uint32_t *values)
{
radeon_set_context_reg_seq(cs, reg, num);
radeon_emit_array(cs, values, num);
}
VkResult
radv_create_shadow_regs_preamble(const struct radv_device *device,
struct radv_queue_state *queue_state)
{
struct radeon_winsys *ws = device->ws;
struct radeon_info *info = &device->physical_device->rad_info;
VkResult result;
struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX);
if (!cs)
return VK_ERROR_OUT_OF_HOST_MEMORY;
/* allocate memory for queue_state->shadowed_regs where register states are saved */
result = ws->buffer_create(ws, SI_SHADOWED_REG_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_NO_INTERPROCESS_SHARING,
RADV_BO_PRIORITY_SCRATCH, 0, &queue_state->shadowed_regs);
if (result != VK_SUCCESS)
goto fail;
/* fill the cs for shadow regs preamble ib that starts the register shadowing */
ac_create_shadowing_ib_preamble(info, (pm4_cmd_add_fn)&radeon_emit, cs,
queue_state->shadowed_regs->va, device->pbb_allowed);
while (cs->cdw & 7) {
if (info->gfx_ib_pad_with_type2)
radeon_emit(cs, PKT2_NOP_PAD);
else
radeon_emit(cs, PKT3_NOP_PAD);
}
result = ws->buffer_create(ws, cs->cdw * 4, 4096, ws->cs_domain(ws),
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
RADV_BO_PRIORITY_CS, 0, &queue_state->shadow_regs_ib);
if (result != VK_SUCCESS)
goto fail_ib_buffer;
/* copy the cs to queue_state->shadow_regs_ib. This will be the first preamble ib
* added in radv_update_preamble_cs.
*/
void *map = ws->buffer_map(queue_state->shadow_regs_ib);
if (!map) {
result = VK_ERROR_MEMORY_MAP_FAILED;
goto fail_map;
}
memcpy(map, cs->buf, cs->cdw * 4);
queue_state->shadow_regs_ib_size_dw = cs->cdw;
ws->buffer_unmap(queue_state->shadow_regs_ib);
ws->cs_destroy(cs);
return VK_SUCCESS;
fail_map:
ws->buffer_destroy(ws, queue_state->shadow_regs_ib);
queue_state->shadow_regs_ib = NULL;
fail_ib_buffer:
ws->buffer_destroy(ws, queue_state->shadowed_regs);
queue_state->shadowed_regs = NULL;
fail:
ws->cs_destroy(cs);
return result;
}
void
radv_destroy_shadow_regs_preamble(struct radv_queue_state *queue_state, struct radeon_winsys *ws)
{
if (queue_state->shadow_regs_ib)
ws->buffer_destroy(ws, queue_state->shadow_regs_ib);
if (queue_state->shadowed_regs)
ws->buffer_destroy(ws, queue_state->shadowed_regs);
}
void
radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
struct radv_queue_state *queue_state)
{
uint64_t va = radv_buffer_get_va(queue_state->shadow_regs_ib);
radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, queue_state->shadow_regs_ib_size_dw & 0xffff);
radv_cs_add_buffer(device->ws, cs, queue_state->shadowed_regs);
radv_cs_add_buffer(device->ws, cs, queue_state->shadow_regs_ib);
}
/* radv_init_shadowed_regs_buffer_state() will be called once from radv_queue_init(). This
* initializes the shadowed_regs buffer to good state */
VkResult
radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue)
{
struct radeon_info *info = &device->physical_device->rad_info;
struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs;
VkResult result;
cs = ws->cs_create(ws, AMD_IP_GFX);
if (!cs)
return VK_ERROR_OUT_OF_HOST_MEMORY;
radv_emit_shadow_regs_preamble(cs, device, &queue->state);
ac_emulate_clear_state(info, cs, radv_set_context_reg_array);
result = ws->cs_finalize(cs);
if (result == VK_SUCCESS) {
if (!radv_queue_internal_submit(queue, cs))
result = VK_ERROR_UNKNOWN;
}
ws->cs_destroy(cs);
return result;
}

View File

@@ -3123,14 +3123,28 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
if (result != VK_SUCCESS)
return result;
queue->vk.driver_submit = radv_queue_submit;
queue->state.uses_shadow_regs =
device->uses_shadow_regs && queue->state.qf == RADV_QUEUE_GENERAL;
if (queue->state.uses_shadow_regs) {
result = radv_create_shadow_regs_preamble(device, &queue->state);
if (result != VK_SUCCESS)
goto fail;
result = radv_init_shadowed_regs_buffer_state(device, queue);
if (result != VK_SUCCESS)
goto fail;
}
queue->vk.driver_submit = radv_queue_submit;
return VK_SUCCESS;
fail:
vk_queue_finish(&queue->vk);
return result;
}
static void
radv_queue_state_finish(struct radv_queue_state *queue, struct radv_device *device)
{
radv_destroy_shadow_regs_preamble(queue, device->ws);
if (queue->initial_full_flush_preamble_cs)
device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
if (queue->initial_preamble_cs)
@@ -3868,6 +3882,10 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
device->overallocation_disallowed = overallocation_disallowed;
mtx_init(&device->overallocation_mutex, mtx_plain);
if (physical_device->rad_info.mid_command_buffer_preemption_enabled ||
device->instance->debug_flags & RADV_DEBUG_SHADOW_REGS)
device->uses_shadow_regs = true;
/* Create one context per queue priority. */
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
@@ -5058,6 +5076,8 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
/* Emit initial configuration. */
switch (queue->qf) {
case RADV_QUEUE_GENERAL:
if (queue->uses_shadow_regs)
radv_emit_shadow_regs_preamble(cs, device, queue);
radv_init_graphics_state(cs, device);
if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo) {

View File

@@ -774,6 +774,16 @@ struct radv_queue_state {
struct radeon_cmdbuf *continue_preamble_cs;
struct radeon_cmdbuf *gang_wait_preamble_cs;
struct radeon_cmdbuf *gang_wait_postamble_cs;
/* the uses_shadow_regs here will be set only for general queue */
bool uses_shadow_regs;
/* register state is saved in shadowed_regs buffer */
struct radeon_winsys_bo *shadowed_regs;
/* shadow regs preamble ib. This will be the first preamble ib.
* This ib has the packets to start register shadowing.
*/
struct radeon_winsys_bo *shadow_regs_ib;
uint32_t shadow_regs_ib_size_dw;
};
struct radv_queue {
@@ -1014,6 +1024,8 @@ struct radv_device {
struct radeon_cmdbuf **perf_counter_lock_cs;
bool uses_device_generated_commands;
bool uses_shadow_regs;
};
bool radv_device_set_pstate(struct radv_device *device, bool enable);
@@ -1701,6 +1713,15 @@ void si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scis
void si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports,
unsigned rast_prim, unsigned polygon_mode, float line_width);
VkResult radv_create_shadow_regs_preamble(const struct radv_device *device,
struct radv_queue_state *queue_state);
void radv_destroy_shadow_regs_preamble(struct radv_queue_state *queue_state,
struct radeon_winsys *ws);
void radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
struct radv_queue_state *queue_state);
VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device,
struct radv_queue *queue);
uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
bool indirect_draw, bool count_from_stream_output,
uint32_t draw_vertex_count, unsigned topology,

View File

@@ -200,13 +200,15 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
bool has_clear_state = physical_device->rad_info.has_clear_state;
int i;
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
if (!device->uses_shadow_regs) {
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
if (has_clear_state) {
radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
radeon_emit(cs, 0);
if (has_clear_state) {
radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
radeon_emit(cs, 0);
}
}
if (physical_device->rad_info.gfx_level <= GFX8)