radv: add support for register shadowing
Signed-off-by: Yogesh Mohan Marimuthu <yogesh.mohanmarimuthu@amd.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18301>
This commit is contained in:

committed by
Marge Bot

parent
db61db7f67
commit
97b9b2cf40
@@ -60,6 +60,7 @@ libradv_files = files(
|
||||
'radv_acceleration_structure.h',
|
||||
'radv_android.c',
|
||||
'radv_cmd_buffer.c',
|
||||
'radv_cp_reg_shadowing.c',
|
||||
'radv_cs.h',
|
||||
'radv_debug.c',
|
||||
'radv_debug.h',
|
||||
|
148
src/amd/vulkan/radv_cp_reg_shadowing.c
Normal file
148
src/amd/vulkan/radv_cp_reg_shadowing.c
Normal file
@@ -0,0 +1,148 @@
|
||||
/*
|
||||
* Copyright 2023 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ac_shadowed_regs.h"
|
||||
#include "radv_cs.h"
|
||||
#include "radv_debug.h"
|
||||
#include "radv_private.h"
|
||||
#include "sid.h"
|
||||
|
||||
static void
|
||||
radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
|
||||
const uint32_t *values)
|
||||
{
|
||||
radeon_set_context_reg_seq(cs, reg, num);
|
||||
radeon_emit_array(cs, values, num);
|
||||
}
|
||||
|
||||
VkResult
|
||||
radv_create_shadow_regs_preamble(const struct radv_device *device,
|
||||
struct radv_queue_state *queue_state)
|
||||
{
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
struct radeon_info *info = &device->physical_device->rad_info;
|
||||
VkResult result;
|
||||
|
||||
struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX);
|
||||
if (!cs)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
/* allocate memory for queue_state->shadowed_regs where register states are saved */
|
||||
result = ws->buffer_create(ws, SI_SHADOWED_REG_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_NO_INTERPROCESS_SHARING,
|
||||
RADV_BO_PRIORITY_SCRATCH, 0, &queue_state->shadowed_regs);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
/* fill the cs for shadow regs preamble ib that starts the register shadowing */
|
||||
ac_create_shadowing_ib_preamble(info, (pm4_cmd_add_fn)&radeon_emit, cs,
|
||||
queue_state->shadowed_regs->va, device->pbb_allowed);
|
||||
|
||||
while (cs->cdw & 7) {
|
||||
if (info->gfx_ib_pad_with_type2)
|
||||
radeon_emit(cs, PKT2_NOP_PAD);
|
||||
else
|
||||
radeon_emit(cs, PKT3_NOP_PAD);
|
||||
}
|
||||
|
||||
result = ws->buffer_create(ws, cs->cdw * 4, 4096, ws->cs_domain(ws),
|
||||
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||
RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
|
||||
RADV_BO_PRIORITY_CS, 0, &queue_state->shadow_regs_ib);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_ib_buffer;
|
||||
|
||||
/* copy the cs to queue_state->shadow_regs_ib. This will be the first preamble ib
|
||||
* added in radv_update_preamble_cs.
|
||||
*/
|
||||
void *map = ws->buffer_map(queue_state->shadow_regs_ib);
|
||||
if (!map) {
|
||||
result = VK_ERROR_MEMORY_MAP_FAILED;
|
||||
goto fail_map;
|
||||
}
|
||||
memcpy(map, cs->buf, cs->cdw * 4);
|
||||
queue_state->shadow_regs_ib_size_dw = cs->cdw;
|
||||
|
||||
ws->buffer_unmap(queue_state->shadow_regs_ib);
|
||||
ws->cs_destroy(cs);
|
||||
return VK_SUCCESS;
|
||||
fail_map:
|
||||
ws->buffer_destroy(ws, queue_state->shadow_regs_ib);
|
||||
queue_state->shadow_regs_ib = NULL;
|
||||
fail_ib_buffer:
|
||||
ws->buffer_destroy(ws, queue_state->shadowed_regs);
|
||||
queue_state->shadowed_regs = NULL;
|
||||
fail:
|
||||
ws->cs_destroy(cs);
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
radv_destroy_shadow_regs_preamble(struct radv_queue_state *queue_state, struct radeon_winsys *ws)
|
||||
{
|
||||
if (queue_state->shadow_regs_ib)
|
||||
ws->buffer_destroy(ws, queue_state->shadow_regs_ib);
|
||||
if (queue_state->shadowed_regs)
|
||||
ws->buffer_destroy(ws, queue_state->shadowed_regs);
|
||||
}
|
||||
|
||||
void
|
||||
radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
|
||||
struct radv_queue_state *queue_state)
|
||||
{
|
||||
uint64_t va = radv_buffer_get_va(queue_state->shadow_regs_ib);
|
||||
radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, queue_state->shadow_regs_ib_size_dw & 0xffff);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, queue_state->shadowed_regs);
|
||||
radv_cs_add_buffer(device->ws, cs, queue_state->shadow_regs_ib);
|
||||
}
|
||||
|
||||
/* radv_init_shadowed_regs_buffer_state() will be called once from radv_queue_init(). This
|
||||
* initializes the shadowed_regs buffer to good state */
|
||||
VkResult
|
||||
radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue)
|
||||
{
|
||||
struct radeon_info *info = &device->physical_device->rad_info;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
struct radeon_cmdbuf *cs;
|
||||
VkResult result;
|
||||
|
||||
cs = ws->cs_create(ws, AMD_IP_GFX);
|
||||
if (!cs)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
radv_emit_shadow_regs_preamble(cs, device, &queue->state);
|
||||
ac_emulate_clear_state(info, cs, radv_set_context_reg_array);
|
||||
|
||||
result = ws->cs_finalize(cs);
|
||||
if (result == VK_SUCCESS) {
|
||||
if (!radv_queue_internal_submit(queue, cs))
|
||||
result = VK_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
ws->cs_destroy(cs);
|
||||
return result;
|
||||
}
|
@@ -3123,14 +3123,28 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
queue->vk.driver_submit = radv_queue_submit;
|
||||
queue->state.uses_shadow_regs =
|
||||
device->uses_shadow_regs && queue->state.qf == RADV_QUEUE_GENERAL;
|
||||
if (queue->state.uses_shadow_regs) {
|
||||
result = radv_create_shadow_regs_preamble(device, &queue->state);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
result = radv_init_shadowed_regs_buffer_state(device, queue);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
queue->vk.driver_submit = radv_queue_submit;
|
||||
return VK_SUCCESS;
|
||||
fail:
|
||||
vk_queue_finish(&queue->vk);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_queue_state_finish(struct radv_queue_state *queue, struct radv_device *device)
|
||||
{
|
||||
radv_destroy_shadow_regs_preamble(queue, device->ws);
|
||||
if (queue->initial_full_flush_preamble_cs)
|
||||
device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
|
||||
if (queue->initial_preamble_cs)
|
||||
@@ -3868,6 +3882,10 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
||||
device->overallocation_disallowed = overallocation_disallowed;
|
||||
mtx_init(&device->overallocation_mutex, mtx_plain);
|
||||
|
||||
if (physical_device->rad_info.mid_command_buffer_preemption_enabled ||
|
||||
device->instance->debug_flags & RADV_DEBUG_SHADOW_REGS)
|
||||
device->uses_shadow_regs = true;
|
||||
|
||||
/* Create one context per queue priority. */
|
||||
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
|
||||
const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
|
||||
@@ -5058,6 +5076,8 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
||||
/* Emit initial configuration. */
|
||||
switch (queue->qf) {
|
||||
case RADV_QUEUE_GENERAL:
|
||||
if (queue->uses_shadow_regs)
|
||||
radv_emit_shadow_regs_preamble(cs, device, queue);
|
||||
radv_init_graphics_state(cs, device);
|
||||
|
||||
if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo) {
|
||||
|
@@ -774,6 +774,16 @@ struct radv_queue_state {
|
||||
struct radeon_cmdbuf *continue_preamble_cs;
|
||||
struct radeon_cmdbuf *gang_wait_preamble_cs;
|
||||
struct radeon_cmdbuf *gang_wait_postamble_cs;
|
||||
|
||||
/* the uses_shadow_regs here will be set only for general queue */
|
||||
bool uses_shadow_regs;
|
||||
/* register state is saved in shadowed_regs buffer */
|
||||
struct radeon_winsys_bo *shadowed_regs;
|
||||
/* shadow regs preamble ib. This will be the first preamble ib.
|
||||
* This ib has the packets to start register shadowing.
|
||||
*/
|
||||
struct radeon_winsys_bo *shadow_regs_ib;
|
||||
uint32_t shadow_regs_ib_size_dw;
|
||||
};
|
||||
|
||||
struct radv_queue {
|
||||
@@ -1014,6 +1024,8 @@ struct radv_device {
|
||||
struct radeon_cmdbuf **perf_counter_lock_cs;
|
||||
|
||||
bool uses_device_generated_commands;
|
||||
|
||||
bool uses_shadow_regs;
|
||||
};
|
||||
|
||||
bool radv_device_set_pstate(struct radv_device *device, bool enable);
|
||||
@@ -1701,6 +1713,15 @@ void si_write_scissors(struct radeon_cmdbuf *cs, int count, const VkRect2D *scis
|
||||
void si_write_guardband(struct radeon_cmdbuf *cs, int count, const VkViewport *viewports,
|
||||
unsigned rast_prim, unsigned polygon_mode, float line_width);
|
||||
|
||||
VkResult radv_create_shadow_regs_preamble(const struct radv_device *device,
|
||||
struct radv_queue_state *queue_state);
|
||||
void radv_destroy_shadow_regs_preamble(struct radv_queue_state *queue_state,
|
||||
struct radeon_winsys *ws);
|
||||
void radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_device *device,
|
||||
struct radv_queue_state *queue_state);
|
||||
VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device,
|
||||
struct radv_queue *queue);
|
||||
|
||||
uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
|
||||
bool indirect_draw, bool count_from_stream_output,
|
||||
uint32_t draw_vertex_count, unsigned topology,
|
||||
|
@@ -200,13 +200,15 @@ si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
bool has_clear_state = physical_device->rad_info.has_clear_state;
|
||||
int i;
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||
radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
|
||||
radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
|
||||
if (!device->uses_shadow_regs) {
|
||||
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||
radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
|
||||
radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
|
||||
|
||||
if (has_clear_state) {
|
||||
radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
|
||||
radeon_emit(cs, 0);
|
||||
if (has_clear_state) {
|
||||
radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
|
||||
radeon_emit(cs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.gfx_level <= GFX8)
|
||||
|
Reference in New Issue
Block a user