radv: implement VK_EXT_device_generated_commands
The major differences compared to the NV extensions are: - support for the sequence index as push constants - support for draw with count tokens (note that DrawID is zero for normal draws) - support for raytracing - support for IES (only compute is supported for now) - improved preprocessing support with the state command buffer param The NV DGC extensions were only enabled for vkd3d-proton and it will maintain both paths for a while, so they can be replaced by the EXT. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31383>
This commit is contained in:

committed by
Marge Bot

parent
637a4b849a
commit
9f8684359f
@@ -1263,11 +1263,12 @@ sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
sqtt_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
|
||||
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
|
||||
sqtt_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
|
||||
const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo)
|
||||
{
|
||||
/* There is no ExecuteIndirect Vulkan event in RGP yet. */
|
||||
API_MARKER_ALIAS(ExecuteGeneratedCommandsNV, ExecuteCommands, commandBuffer, isPreprocessed, pGeneratedCommandsInfo);
|
||||
API_MARKER_ALIAS(ExecuteGeneratedCommandsEXT, ExecuteCommands, commandBuffer, isPreprocessed,
|
||||
pGeneratedCommandsInfo);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
@@ -111,8 +111,8 @@ libradv_files = files(
|
||||
'radv_device_memory.h',
|
||||
'radv_descriptor_set.c',
|
||||
'radv_descriptor_set.h',
|
||||
'radv_device_generated_commands.c',
|
||||
'radv_device_generated_commands.h',
|
||||
'radv_dgc.c',
|
||||
'radv_dgc.h',
|
||||
'radv_event.c',
|
||||
'radv_event.h',
|
||||
'radv_formats.c',
|
||||
|
@@ -511,7 +511,7 @@ radv_device_init_meta(struct radv_device *device)
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_astc_decode;
|
||||
|
||||
if (radv_uses_device_generated_commands(device)) {
|
||||
if (device->vk.enabled_features.deviceGeneratedCommands) {
|
||||
result = radv_device_init_dgc_prepare_state(device, on_demand);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_dgc;
|
||||
|
@@ -194,23 +194,11 @@ radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize siz
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits =
|
||||
((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
|
||||
|
||||
/* Allow 32-bit address-space for DGC usage, as this buffer will contain
|
||||
* cmd buffer upload buffers, and those get passed to shaders through 32-bit
|
||||
* pointers.
|
||||
*
|
||||
* We only allow it with this usage set, to "protect" the 32-bit address space
|
||||
* from being overused. The actual requirement is done as part of
|
||||
* vkGetGeneratedCommandsMemoryRequirementsNV. (we have to make sure their
|
||||
* intersection is non-zero at least)
|
||||
*/
|
||||
if ((usage & VK_BUFFER_USAGE_2_INDIRECT_BUFFER_BIT_KHR) && radv_uses_device_generated_commands(device))
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits |= pdev->memory_types_32bit;
|
||||
|
||||
/* Force 32-bit address-space for descriptor buffers usage because they are passed to shaders
|
||||
* through 32-bit pointers.
|
||||
*/
|
||||
if (usage &
|
||||
(VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT))
|
||||
if (usage & (VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
|
||||
VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_2_PREPROCESS_BUFFER_BIT_EXT))
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits = pdev->memory_types_32bit;
|
||||
|
||||
if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
|
||||
|
@@ -13,7 +13,7 @@
|
||||
#include "radv_cp_dma.h"
|
||||
#include "radv_cs.h"
|
||||
#include "radv_debug.h"
|
||||
#include "radv_device_generated_commands.h"
|
||||
#include "radv_dgc.h"
|
||||
#include "radv_event.h"
|
||||
#include "radv_pipeline_rt.h"
|
||||
#include "radv_radeon_winsys.h"
|
||||
@@ -477,7 +477,6 @@ radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandB
|
||||
cmd_buffer->gang.sem.emitted_leader_value = 0;
|
||||
cmd_buffer->gang.sem.va = 0;
|
||||
cmd_buffer->shader_upload_seq = 0;
|
||||
cmd_buffer->has_indirect_pipeline_binds = false;
|
||||
|
||||
if (cmd_buffer->upload.upload_bo)
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo);
|
||||
@@ -646,8 +645,8 @@ radv_gang_barrier(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_
|
||||
|
||||
/* Add stage flush only when necessary. */
|
||||
if (src_stage_mask & (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
|
||||
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
|
||||
VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV))
|
||||
VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_EXT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
|
||||
cmd_buffer->gang.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
|
||||
|
||||
/* Block task shaders when we have to wait for CP DMA on the GFX cmdbuf. */
|
||||
@@ -6645,9 +6644,10 @@ radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_s
|
||||
|
||||
if (src_stage_mask &
|
||||
(VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
|
||||
VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
|
||||
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
|
||||
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR |
|
||||
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) {
|
||||
VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_EXT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) {
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
|
||||
}
|
||||
|
||||
@@ -6719,7 +6719,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2
|
||||
has_DB_meta = false;
|
||||
}
|
||||
|
||||
if (src_flags & VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_NV)
|
||||
if (src_flags & VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_EXT)
|
||||
flush_bits |= RADV_CMD_FLAG_INV_L2;
|
||||
|
||||
if (src_flags & (VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT | VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)) {
|
||||
@@ -6808,9 +6808,8 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2
|
||||
flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
|
||||
|
||||
/* Ensure the DGC meta shader can read the commands. */
|
||||
if (radv_uses_device_generated_commands(device)) {
|
||||
if (device->vk.enabled_features.deviceGeneratedCommands) {
|
||||
flush_bits |= RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE;
|
||||
|
||||
if (pdev->info.gfx_level < GFX9)
|
||||
flush_bits |= RADV_CMD_FLAG_INV_L2;
|
||||
}
|
||||
@@ -6849,7 +6848,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2
|
||||
flush_bits |= RADV_CMD_FLAG_INV_L2;
|
||||
}
|
||||
|
||||
if (dst_flags & VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_NV) {
|
||||
if (dst_flags & VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_EXT) {
|
||||
flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
|
||||
if (pdev->info.gfx_level < GFX9)
|
||||
flush_bits |= RADV_CMD_FLAG_INV_L2;
|
||||
@@ -11558,52 +11557,31 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b
|
||||
}
|
||||
|
||||
/* TODO: Use these functions with the normal dispatch path. */
|
||||
static void radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer);
|
||||
static void radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point);
|
||||
static void radv_dgc_after_dispatch(struct radv_cmd_buffer *cmd_buffer);
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_CmdPreprocessGeneratedCommandsNV(VkCommandBuffer commandBuffer,
|
||||
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
|
||||
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
|
||||
|
||||
if (!radv_dgc_can_preprocess(layout, pipeline))
|
||||
return;
|
||||
|
||||
/* VK_EXT_conditional_rendering says that copy commands should not be
|
||||
* affected by conditional rendering.
|
||||
*/
|
||||
const bool old_predicating = cmd_buffer->state.predicating;
|
||||
cmd_buffer->state.predicating = false;
|
||||
|
||||
radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo, old_predicating);
|
||||
|
||||
/* Restore conditional rendering. */
|
||||
cmd_buffer->state.predicating = old_predicating;
|
||||
}
|
||||
|
||||
/* VK_EXT_device_generated_commands */
|
||||
static void
|
||||
radv_dgc_execute_ib(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
|
||||
radv_dgc_execute_ib(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const bool has_task_shader = radv_dgc_with_task_shader(pGeneratedCommandsInfo);
|
||||
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const VkGeneratedCommandsPipelineInfoEXT *pipeline_info =
|
||||
vk_find_struct_const(pGeneratedCommandsInfo->pNext, GENERATED_COMMANDS_PIPELINE_INFO_EXT);
|
||||
const VkGeneratedCommandsShaderInfoEXT *eso_info =
|
||||
vk_find_struct_const(pGeneratedCommandsInfo->pNext, GENERATED_COMMANDS_SHADER_INFO_EXT);
|
||||
const struct radv_shader *task_shader = radv_dgc_get_shader(pipeline_info, eso_info, MESA_SHADER_TASK);
|
||||
const uint32_t cmdbuf_size = radv_get_indirect_main_cmdbuf_size(pGeneratedCommandsInfo);
|
||||
const uint64_t ib_va =
|
||||
radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + pGeneratedCommandsInfo->preprocessOffset;
|
||||
const uint64_t main_trailer_va = ib_va + radv_get_indirect_main_trailer_offset(pGeneratedCommandsInfo);
|
||||
const uint64_t ib_va = pGeneratedCommandsInfo->preprocessAddress;
|
||||
const uint64_t main_ib_va = ib_va + radv_get_indirect_main_cmdbuf_offset(pGeneratedCommandsInfo);
|
||||
const uint64_t main_trailer_va = ib_va + radv_get_indirect_main_trailer_offset(pGeneratedCommandsInfo);
|
||||
|
||||
device->ws->cs_chain_dgc_ib(cmd_buffer->cs, main_ib_va, cmdbuf_size >> 2, main_trailer_va,
|
||||
cmd_buffer->state.predicating);
|
||||
|
||||
if (has_task_shader) {
|
||||
if (task_shader) {
|
||||
const uint32_t ace_cmdbuf_size = radv_get_indirect_ace_cmdbuf_size(pGeneratedCommandsInfo);
|
||||
const uint64_t ace_trailer_va = ib_va + radv_get_indirect_ace_trailer_offset(pGeneratedCommandsInfo);
|
||||
const uint64_t ace_ib_va = ib_va + radv_get_indirect_ace_cmdbuf_offset(pGeneratedCommandsInfo);
|
||||
const uint64_t ace_trailer_va = ib_va + radv_get_indirect_ace_trailer_offset(pGeneratedCommandsInfo);
|
||||
|
||||
assert(cmd_buffer->gang.cs);
|
||||
device->ws->cs_chain_dgc_ib(cmd_buffer->gang.cs, ace_ib_va, ace_cmdbuf_size >> 2, ace_trailer_va,
|
||||
@@ -11612,82 +11590,82 @@ radv_dgc_execute_ib(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommand
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
|
||||
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
|
||||
radv_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
|
||||
const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
|
||||
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
|
||||
VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const bool compute = layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE;
|
||||
VK_FROM_HANDLE(radv_indirect_execution_set, ies, pGeneratedCommandsInfo->indirectExecutionSet);
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const bool use_predication = radv_use_dgc_predication(cmd_buffer, pGeneratedCommandsInfo);
|
||||
const bool compute = !!(layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_DISPATCH));
|
||||
const bool rt = !!(layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_RT));
|
||||
const VkGeneratedCommandsPipelineInfoEXT *pipeline_info =
|
||||
vk_find_struct_const(pGeneratedCommandsInfo->pNext, GENERATED_COMMANDS_PIPELINE_INFO_EXT);
|
||||
const VkGeneratedCommandsShaderInfoEXT *eso_info =
|
||||
vk_find_struct_const(pGeneratedCommandsInfo->pNext, GENERATED_COMMANDS_SHADER_INFO_EXT);
|
||||
|
||||
/* Secondary command buffers are needed for the full extension but can't use
|
||||
* PKT3_INDIRECT_BUFFER.
|
||||
*/
|
||||
if (ies) {
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, ies->bo);
|
||||
|
||||
cmd_buffer->compute_scratch_size_per_wave_needed =
|
||||
MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, ies->compute_scratch_size_per_wave);
|
||||
cmd_buffer->compute_scratch_waves_wanted =
|
||||
MAX2(cmd_buffer->compute_scratch_waves_wanted, ies->compute_scratch_waves);
|
||||
}
|
||||
|
||||
/* Secondary command buffers are banned. */
|
||||
assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
||||
|
||||
if (use_predication) {
|
||||
VK_FROM_HANDLE(radv_buffer, seq_count_buffer, pGeneratedCommandsInfo->sequencesCountBuffer);
|
||||
const uint64_t va = radv_buffer_get_va(seq_count_buffer->bo) + seq_count_buffer->offset +
|
||||
pGeneratedCommandsInfo->sequencesCountOffset;
|
||||
|
||||
const uint64_t va = pGeneratedCommandsInfo->sequenceCountAddress;
|
||||
radv_begin_conditional_rendering(cmd_buffer, va, true);
|
||||
}
|
||||
|
||||
if (!radv_dgc_can_preprocess(layout, pipeline)) {
|
||||
if (!(layout->vk.usage & VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EXPLICIT_PREPROCESS_BIT_EXT)) {
|
||||
/* Suspend conditional rendering when the DGC execute is called on the compute queue to
|
||||
* generate a cmdbuf which will skips dispatches when necessary. This is because the
|
||||
* compute queue is missing IB2 which means it's not possible to skip the cmdbuf entirely.
|
||||
* It should also be suspended when task shaders are used because the DGC ACE IB would be
|
||||
* generate a cmdbuf which will skips dispatches when necessary. This is because the compute
|
||||
* queue is missing IB2 which means it's not possible to skip the cmdbuf entirely. This
|
||||
* should also be suspended when task shaders are used because the DGC ACE IB would be
|
||||
* uninitialized otherwise.
|
||||
*/
|
||||
const bool suspend_cond_render =
|
||||
(cmd_buffer->qf == RADV_QUEUE_COMPUTE || radv_dgc_with_task_shader(pGeneratedCommandsInfo));
|
||||
const bool suspend_conditional_rendering =
|
||||
(cmd_buffer->qf == RADV_QUEUE_COMPUTE || radv_dgc_get_shader(pipeline_info, eso_info, MESA_SHADER_TASK));
|
||||
const bool old_predicating = cmd_buffer->state.predicating;
|
||||
|
||||
if (suspend_cond_render && cmd_buffer->state.predicating) {
|
||||
if (suspend_conditional_rendering && cmd_buffer->state.predicating) {
|
||||
cmd_buffer->state.predicating = false;
|
||||
}
|
||||
|
||||
radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo, old_predicating);
|
||||
radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo, cmd_buffer, old_predicating);
|
||||
|
||||
if (suspend_cond_render) {
|
||||
if (suspend_conditional_rendering) {
|
||||
cmd_buffer->state.predicating = old_predicating;
|
||||
}
|
||||
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2;
|
||||
|
||||
if (radv_dgc_with_task_shader(pGeneratedCommandsInfo)) {
|
||||
/* Make sure the DGC ACE IB will wait for the DGC prepare shader before the execution
|
||||
* starts.
|
||||
*/
|
||||
/* Make sure the DGC ACE IB will wait for the DGC prepare shader before the execution
|
||||
* starts.
|
||||
*/
|
||||
if (radv_dgc_get_shader(pipeline_info, eso_info, MESA_SHADER_TASK)) {
|
||||
radv_gang_barrier(cmd_buffer, VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV,
|
||||
VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT);
|
||||
}
|
||||
}
|
||||
|
||||
if (compute) {
|
||||
radv_dgc_before_dispatch(cmd_buffer);
|
||||
|
||||
if (!pGeneratedCommandsInfo->pipeline)
|
||||
cmd_buffer->has_indirect_pipeline_binds = true;
|
||||
if (rt) {
|
||||
radv_dgc_before_dispatch(cmd_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
|
||||
} else if (compute) {
|
||||
radv_dgc_before_dispatch(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
} else {
|
||||
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
|
||||
struct radv_draw_info info;
|
||||
struct radv_draw_info info = {
|
||||
.count = pGeneratedCommandsInfo->maxSequenceCount,
|
||||
.indirect = (void *)&info,
|
||||
.indexed = !!(layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_INDEXED)),
|
||||
};
|
||||
|
||||
info.count = pGeneratedCommandsInfo->sequencesCount;
|
||||
info.indirect = prep_buffer; /* We're not really going use it this way, but a good signal
|
||||
that this is not direct. */
|
||||
info.indirect_offset = 0;
|
||||
info.stride = 0;
|
||||
info.strmout_buffer = NULL;
|
||||
info.count_buffer = NULL;
|
||||
info.indexed = layout->indexed;
|
||||
info.instance_count = 0;
|
||||
|
||||
if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_MESH)) {
|
||||
if (layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_MESH)) {
|
||||
if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1, true))
|
||||
return;
|
||||
} else {
|
||||
@@ -11696,46 +11674,63 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
|
||||
|
||||
if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
|
||||
radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
|
||||
radeon_emit(cmd_buffer->cs, 0);
|
||||
}
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, prep_buffer->bo);
|
||||
|
||||
if (compute || !view_mask) {
|
||||
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
|
||||
if (rt || compute || !view_mask) {
|
||||
radv_dgc_execute_ib(cmd_buffer, pGeneratedCommandsInfo);
|
||||
} else {
|
||||
u_foreach_bit (view, view_mask) {
|
||||
radv_emit_view_index(&cmd_buffer->state, cmd_buffer->cs, view);
|
||||
|
||||
radv_dgc_execute_ib(cmd_buffer, pGeneratedCommandsInfo);
|
||||
}
|
||||
}
|
||||
|
||||
if (compute) {
|
||||
if (rt) {
|
||||
cmd_buffer->push_constant_stages |= RADV_RT_STAGE_BITS;
|
||||
|
||||
radv_dgc_after_dispatch(cmd_buffer);
|
||||
} else if (compute) {
|
||||
cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
if (!pGeneratedCommandsInfo->pipeline)
|
||||
if (ies)
|
||||
radv_mark_descriptor_sets_dirty(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
|
||||
radv_dgc_after_dispatch(cmd_buffer);
|
||||
} else {
|
||||
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
|
||||
|
||||
if (layout->binds_index_buffer) {
|
||||
if (layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_IB)) {
|
||||
cmd_buffer->state.last_index_type = -1;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
|
||||
}
|
||||
|
||||
if (layout->bind_vbo_mask)
|
||||
if (layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_VB))
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER;
|
||||
|
||||
cmd_buffer->push_constant_stages |= graphics_pipeline->active_stages;
|
||||
if (pipeline_info) {
|
||||
VK_FROM_HANDLE(radv_pipeline, pipeline, pipeline_info->pipeline);
|
||||
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
|
||||
|
||||
cmd_buffer->push_constant_stages |= graphics_pipeline->active_stages;
|
||||
} else {
|
||||
assert(eso_info);
|
||||
|
||||
for (unsigned i = 0; i < eso_info->shaderCount; ++i) {
|
||||
VK_FROM_HANDLE(radv_shader_object, shader_object, eso_info->pShaders[i]);
|
||||
|
||||
cmd_buffer->push_constant_stages |= mesa_to_vk_shader_stage(shader_object->stage);
|
||||
}
|
||||
}
|
||||
|
||||
if (!(layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_INDEXED))) {
|
||||
/* Non-indexed draws overwrite VGT_INDEX_TYPE, so the state must be
|
||||
* re-emitted before the next indexed draw.
|
||||
*/
|
||||
cmd_buffer->state.last_index_type = -1;
|
||||
}
|
||||
|
||||
cmd_buffer->state.last_index_type = -1;
|
||||
cmd_buffer->state.last_num_instances = -1;
|
||||
cmd_buffer->state.last_vertex_offset_valid = false;
|
||||
cmd_buffer->state.last_first_instance = -1;
|
||||
@@ -12102,12 +12097,16 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_inf
|
||||
}
|
||||
|
||||
static void
|
||||
radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer)
|
||||
radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_compute_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
|
||||
struct radv_shader *compute_shader = cmd_buffer->state.shaders[MESA_SHADER_COMPUTE];
|
||||
struct radv_compute_pipeline *pipeline = bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
|
||||
? &cmd_buffer->state.rt_pipeline->base
|
||||
: cmd_buffer->state.compute_pipeline;
|
||||
struct radv_shader *compute_shader = bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
|
||||
? cmd_buffer->state.rt_pipeline->prolog
|
||||
: cmd_buffer->state.shaders[MESA_SHADER_COMPUTE];
|
||||
bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline;
|
||||
|
||||
/* We will have run the DGC patch shaders before, so we can assume that there is something to
|
||||
@@ -12119,9 +12118,11 @@ radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer)
|
||||
|
||||
if (pipeline)
|
||||
radv_emit_compute_pipeline(cmd_buffer, pipeline);
|
||||
if (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR)
|
||||
radv_emit_rt_stack_size(cmd_buffer);
|
||||
radv_emit_cache_flush(cmd_buffer);
|
||||
|
||||
radv_upload_compute_shader_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
radv_upload_compute_shader_descriptors(cmd_buffer, bind_point);
|
||||
|
||||
if (pipeline_is_dirty) {
|
||||
const bool has_prefetch = pdev->info.gfx_level >= GFX7;
|
||||
@@ -12136,7 +12137,9 @@ radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer)
|
||||
* We only need to do this when the pipeline is dirty because when we switch between
|
||||
* the two we always need to switch pipelines.
|
||||
*/
|
||||
radv_mark_descriptor_sets_dirty(cmd_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
|
||||
radv_mark_descriptor_sets_dirty(cmd_buffer, bind_point == VK_PIPELINE_BIND_POINT_COMPUTE
|
||||
? VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
|
||||
: VK_PIPELINE_BIND_POINT_COMPUTE);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13672,42 +13675,6 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_CmdBindPipelineShaderGroupNV(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
|
||||
VkPipeline pipeline, uint32_t groupIndex)
|
||||
{
|
||||
fprintf(stderr, "radv: unimplemented vkCmdBindPipelineShaderGroupNV\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
/* VK_NV_device_generated_commands_compute */
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_CmdUpdatePipelineIndirectBufferNV(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
|
||||
VkPipeline _pipeline)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
|
||||
const struct radeon_cmdbuf *cs = &compute_pipeline->indirect.cs;
|
||||
const uint64_t va = compute_pipeline->indirect.va;
|
||||
struct radv_compute_pipeline_metadata metadata;
|
||||
uint32_t offset = 0;
|
||||
|
||||
radv_get_compute_shader_metadata(device, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], &metadata);
|
||||
|
||||
radv_write_data(cmd_buffer, V_370_ME, va + offset, sizeof(metadata) / 4, (const uint32_t *)&metadata, false);
|
||||
offset += sizeof(metadata);
|
||||
|
||||
radv_write_data(cmd_buffer, V_370_ME, va + offset, 1, (const uint32_t *)&cs->cdw, false);
|
||||
offset += sizeof(uint32_t);
|
||||
|
||||
radv_write_data(cmd_buffer, V_370_ME, va + offset, cs->cdw, (const uint32_t *)cs->buf, false);
|
||||
offset += cs->cdw * sizeof(uint32_t);
|
||||
|
||||
assert(offset < compute_pipeline->indirect.size);
|
||||
}
|
||||
|
||||
/* VK_EXT_descriptor_buffer */
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer, uint32_t bufferCount,
|
||||
|
@@ -537,7 +537,6 @@ struct radv_cmd_buffer {
|
||||
bool gds_needed; /* for GFX10 streamout and NGG GS queries */
|
||||
bool gds_oa_needed; /* for GFX10 streamout */
|
||||
bool sample_positions_needed;
|
||||
bool has_indirect_pipeline_binds;
|
||||
|
||||
uint64_t gfx9_fence_va;
|
||||
uint32_t gfx9_fence_idx;
|
||||
|
@@ -1100,7 +1100,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
||||
simple_mtx_init(&device->trace_mtx, mtx_plain);
|
||||
simple_mtx_init(&device->pstate_mtx, mtx_plain);
|
||||
simple_mtx_init(&device->rt_handles_mtx, mtx_plain);
|
||||
simple_mtx_init(&device->compute_scratch_mtx, mtx_plain);
|
||||
simple_mtx_init(&device->pso_cache_stats_mtx, mtx_plain);
|
||||
|
||||
device->rt_handles = _mesa_hash_table_create(NULL, _mesa_hash_u32, _mesa_key_u32_equal);
|
||||
@@ -1359,7 +1358,6 @@ fail_queue:
|
||||
simple_mtx_destroy(&device->pstate_mtx);
|
||||
simple_mtx_destroy(&device->trace_mtx);
|
||||
simple_mtx_destroy(&device->rt_handles_mtx);
|
||||
simple_mtx_destroy(&device->compute_scratch_mtx);
|
||||
simple_mtx_destroy(&device->pso_cache_stats_mtx);
|
||||
mtx_destroy(&device->overallocation_mutex);
|
||||
|
||||
@@ -1417,7 +1415,6 @@ radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
||||
simple_mtx_destroy(&device->pstate_mtx);
|
||||
simple_mtx_destroy(&device->trace_mtx);
|
||||
simple_mtx_destroy(&device->rt_handles_mtx);
|
||||
simple_mtx_destroy(&device->compute_scratch_mtx);
|
||||
simple_mtx_destroy(&device->pso_cache_stats_mtx);
|
||||
|
||||
radv_destroy_shader_arenas(device);
|
||||
|
@@ -541,11 +541,6 @@ struct radv_device {
|
||||
/* Not NULL if a GPU hang report has been generated for VK_EXT_device_fault. */
|
||||
char *gpu_hang_report;
|
||||
|
||||
/* For indirect compute pipeline binds with DGC only. */
|
||||
simple_mtx_t compute_scratch_mtx;
|
||||
uint32_t compute_scratch_size_per_wave;
|
||||
uint32_t compute_scratch_waves;
|
||||
|
||||
/* PSO cache stats */
|
||||
simple_mtx_t pso_cache_stats_mtx;
|
||||
struct radv_pso_cache_stats pso_cache_stats[RADV_PIPELINE_TYPE_COUNT];
|
||||
@@ -559,12 +554,6 @@ radv_device_physical(const struct radv_device *dev)
|
||||
return (struct radv_physical_device *)dev->vk.physical;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
radv_uses_device_generated_commands(const struct radv_device *device)
|
||||
{
|
||||
return device->vk.enabled_features.deviceGeneratedCommandsNV || device->vk.enabled_features.deviceGeneratedCompute;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
radv_uses_primitives_generated_query(const struct radv_device *device)
|
||||
{
|
||||
|
@@ -1,82 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2016 Red Hat.
|
||||
* Copyright © 2016 Bas Nieuwenhuizen
|
||||
*
|
||||
* based in part on anv driver which is:
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef RADV_DEVICE_GENERATED_COMMANDS_H
|
||||
#define RADV_DEVICE_GENERATED_COMMANDS_H
|
||||
|
||||
#include "vk_object.h"
|
||||
|
||||
#include "radv_constants.h"
|
||||
|
||||
struct radv_cmd_buffer;
|
||||
struct radv_pipeline;
|
||||
|
||||
struct radv_indirect_command_layout {
|
||||
struct vk_object_base base;
|
||||
|
||||
VkIndirectCommandsLayoutUsageFlagsNV flags;
|
||||
VkPipelineBindPoint pipeline_bind_point;
|
||||
|
||||
uint32_t input_stride;
|
||||
uint32_t token_count;
|
||||
|
||||
bool indexed;
|
||||
bool binds_index_buffer;
|
||||
bool draw_mesh_tasks;
|
||||
uint16_t draw_params_offset;
|
||||
uint16_t index_buffer_offset;
|
||||
|
||||
uint16_t dispatch_params_offset;
|
||||
|
||||
bool bind_pipeline;
|
||||
uint16_t pipeline_params_offset;
|
||||
|
||||
bool vertex_dynamic_stride;
|
||||
uint32_t bind_vbo_mask;
|
||||
uint32_t vbo_offsets[MAX_VBS];
|
||||
|
||||
uint64_t push_constant_mask;
|
||||
uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4];
|
||||
uint32_t push_constant_size;
|
||||
|
||||
uint32_t ibo_type_32;
|
||||
uint32_t ibo_type_8;
|
||||
|
||||
VkPipeline pipeline;
|
||||
|
||||
VkIndirectCommandsLayoutTokenNV tokens[0];
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV,
|
||||
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
|
||||
|
||||
uint32_t radv_get_indirect_main_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
|
||||
|
||||
uint32_t radv_get_indirect_ace_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
|
||||
|
||||
uint32_t radv_get_indirect_main_cmdbuf_offset(const VkGeneratedCommandsInfoNV *cmd_info);
|
||||
|
||||
uint32_t radv_get_indirect_ace_cmdbuf_offset(const VkGeneratedCommandsInfoNV *cmd_info);
|
||||
|
||||
uint32_t radv_get_indirect_main_trailer_offset(const VkGeneratedCommandsInfoNV *cmd_info);
|
||||
|
||||
uint32_t radv_get_indirect_ace_trailer_offset(const VkGeneratedCommandsInfoNV *cmd_info);
|
||||
|
||||
bool radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer,
|
||||
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
|
||||
|
||||
bool radv_dgc_can_preprocess(const struct radv_indirect_command_layout *layout, struct radv_pipeline *pipeline);
|
||||
|
||||
bool radv_dgc_with_task_shader(const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
|
||||
|
||||
void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo,
|
||||
bool cond_render_enabled);
|
||||
|
||||
#endif /* RADV_DEVICE_GENERATED_COMMANDS_H */
|
File diff suppressed because it is too large
Load Diff
66
src/amd/vulkan/radv_dgc.h
Normal file
66
src/amd/vulkan/radv_dgc.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright © 2024 Valve Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef RADV_DGC_H
|
||||
#define RADV_DGC_H
|
||||
|
||||
#include "compiler/shader_enums.h"
|
||||
|
||||
#include "radv_constants.h"
|
||||
|
||||
#include "vk_device_generated_commands.h"
|
||||
|
||||
struct radv_cmd_buffer;
|
||||
enum radv_queue_family;
|
||||
|
||||
struct radv_indirect_command_layout {
|
||||
struct vk_indirect_command_layout vk;
|
||||
|
||||
uint64_t push_constant_mask;
|
||||
uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4];
|
||||
uint64_t sequence_index_mask;
|
||||
|
||||
VkPipeline pipeline;
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, vk.base, VkIndirectCommandsLayoutEXT,
|
||||
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_EXT)
|
||||
|
||||
struct radv_indirect_execution_set {
|
||||
struct vk_object_base base;
|
||||
|
||||
struct radeon_winsys_bo *bo;
|
||||
uint64_t va;
|
||||
uint8_t *mapped_ptr;
|
||||
|
||||
uint32_t stride;
|
||||
|
||||
uint32_t compute_scratch_size_per_wave;
|
||||
uint32_t compute_scratch_waves;
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_execution_set, base, VkIndirectExecutionSetEXT,
|
||||
VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT);
|
||||
|
||||
uint32_t radv_get_indirect_main_cmdbuf_offset(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
|
||||
uint32_t radv_get_indirect_ace_cmdbuf_offset(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
|
||||
|
||||
uint32_t radv_get_indirect_main_cmdbuf_size(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
|
||||
uint32_t radv_get_indirect_ace_cmdbuf_size(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
|
||||
|
||||
uint32_t radv_get_indirect_main_trailer_offset(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
|
||||
uint32_t radv_get_indirect_ace_trailer_offset(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
|
||||
|
||||
void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo,
|
||||
struct radv_cmd_buffer *state_cmd_buffer, bool cond_render_enabled);
|
||||
|
||||
bool radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer,
|
||||
const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
|
||||
|
||||
struct radv_shader *radv_dgc_get_shader(const VkGeneratedCommandsPipelineInfoEXT *pipeline_info,
|
||||
const VkGeneratedCommandsShaderInfoEXT *eso_info, gl_shader_stage stage);
|
||||
|
||||
#endif /* RADV_DGC_H */
|
@@ -148,7 +148,6 @@ static const driOptionDescription radv_dri_options[] = {
|
||||
DRI_CONF_RADV_DISABLE_TRUNC_COORD(false)
|
||||
DRI_CONF_RADV_DISABLE_SINKING_LOAD_INPUT_FS(false)
|
||||
DRI_CONF_RADV_DISABLE_DEPTH_STORAGE(false)
|
||||
DRI_CONF_RADV_DGC(false)
|
||||
DRI_CONF_RADV_FLUSH_BEFORE_QUERY_COPY(false)
|
||||
DRI_CONF_RADV_ENABLE_UNIFIED_HEAP_ON_APU(false)
|
||||
DRI_CONF_RADV_TEX_NON_UNIFORM(false)
|
||||
@@ -243,8 +242,6 @@ radv_init_dri_options(struct radv_instance *instance)
|
||||
instance->drirc.override_ray_tracing_shader_version =
|
||||
driQueryOptioni(&instance->drirc.options, "radv_override_ray_tracing_shader_version");
|
||||
|
||||
instance->drirc.enable_dgc = driQueryOptionb(&instance->drirc.options, "radv_dgc");
|
||||
|
||||
instance->drirc.override_vram_size = driQueryOptioni(&instance->drirc.options, "override_vram_size");
|
||||
|
||||
instance->drirc.enable_khr_present_wait = driQueryOptionb(&instance->drirc.options, "vk_khr_present_wait");
|
||||
|
@@ -66,7 +66,6 @@ struct radv_instance {
|
||||
bool legacy_sparse_binding;
|
||||
bool force_pstate_peak_gfx11_dgpu;
|
||||
bool clear_lds;
|
||||
bool enable_dgc;
|
||||
bool enable_khr_present_wait;
|
||||
bool report_llvm9_version_string;
|
||||
bool vk_require_etc2;
|
||||
|
@@ -747,10 +747,6 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
|
||||
.INTEL_shader_integer_functions2 = true,
|
||||
.MESA_image_alignment_control = pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level <= GFX11_5,
|
||||
.NV_compute_shader_derivatives = true,
|
||||
.NV_device_generated_commands =
|
||||
pdev->info.gfx_level >= GFX8 && instance->drirc.enable_dgc && !(instance->debug_flags & RADV_DEBUG_NO_IBS),
|
||||
.NV_device_generated_commands_compute =
|
||||
pdev->info.gfx_level >= GFX8 && instance->drirc.enable_dgc && !(instance->debug_flags & RADV_DEBUG_NO_IBS),
|
||||
/* Undocumented extension purely for vkd3d-proton. This check is to prevent anyone else from
|
||||
* using it.
|
||||
*/
|
||||
@@ -1129,9 +1125,6 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc
|
||||
.performanceCounterQueryPools = has_perf_query,
|
||||
.performanceCounterMultipleQueryPools = has_perf_query,
|
||||
|
||||
/* VK_NV_device_generated_commands */
|
||||
.deviceGeneratedCommandsNV = true,
|
||||
|
||||
/* VK_EXT_attachment_feedback_loop_layout */
|
||||
.attachmentFeedbackLoopLayout = true,
|
||||
|
||||
@@ -1214,11 +1207,6 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc
|
||||
/* VK_KHR_maintenance5 */
|
||||
.maintenance5 = true,
|
||||
|
||||
/* VK_NV_device_generated_commands_compute */
|
||||
.deviceGeneratedCompute = true,
|
||||
.deviceGeneratedComputePipelines = true,
|
||||
.deviceGeneratedComputeCaptureReplay = false,
|
||||
|
||||
/* VK_KHR_cooperative_matrix */
|
||||
.cooperativeMatrix = pdev->info.gfx_level >= GFX11 && !pdev->use_llvm,
|
||||
.cooperativeMatrixRobustBufferAccess = pdev->info.gfx_level >= GFX11 && !pdev->use_llvm,
|
||||
@@ -1830,20 +1818,6 @@ radv_get_physical_device_properties(struct radv_physical_device *pdev)
|
||||
/* VK_KHR_performance_query */
|
||||
p->allowCommandBufferQueryCopies = false;
|
||||
|
||||
/* VK_NV_device_generated_commands */
|
||||
p->maxIndirectCommandsStreamCount = 1;
|
||||
p->maxIndirectCommandsStreamStride = UINT32_MAX;
|
||||
p->maxIndirectCommandsTokenCount = 512;
|
||||
p->maxIndirectCommandsTokenOffset = UINT16_MAX;
|
||||
p->minIndirectCommandsBufferOffsetAlignment = 4;
|
||||
p->minSequencesCountBufferOffsetAlignment = 4;
|
||||
p->minSequencesIndexBufferOffsetAlignment = 4;
|
||||
/* Don't support even a shader group count = 1 until we support shader
|
||||
* overrides during pipeline creation. */
|
||||
p->maxGraphicsShaderGroupCount = 0;
|
||||
/* MSB reserved for signalling indirect count enablement. */
|
||||
p->maxIndirectSequenceCount = UINT32_MAX >> 1;
|
||||
|
||||
/* VK_EXT_graphics_pipeline_library */
|
||||
p->graphicsPipelineLibraryFastLinking = true;
|
||||
p->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
|
||||
|
@@ -137,7 +137,7 @@ radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineS
|
||||
if (flags & VK_PIPELINE_CREATE_2_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR)
|
||||
key.view_index_from_device_index = 1;
|
||||
|
||||
if (flags & VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV)
|
||||
if (flags & VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_EXT)
|
||||
key.indirect_bindable = 1;
|
||||
|
||||
if (stage->stage & RADV_GRAPHICS_STAGE_BITS) {
|
||||
|
@@ -305,37 +305,6 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkC
|
||||
|
||||
radv_compute_pipeline_init(pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
|
||||
|
||||
if (pipeline->base.create_flags & VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV) {
|
||||
const VkComputePipelineIndirectBufferInfoNV *indirect_buffer =
|
||||
vk_find_struct_const(pCreateInfo->pNext, COMPUTE_PIPELINE_INDIRECT_BUFFER_INFO_NV);
|
||||
struct radv_shader *shader = pipeline->base.shaders[MESA_SHADER_COMPUTE];
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = &pipeline->indirect.cs;
|
||||
|
||||
cs->reserved_dw = cs->max_dw = 32;
|
||||
cs->buf = malloc(cs->max_dw * 4);
|
||||
if (!cs->buf) {
|
||||
radv_pipeline_destroy(device, &pipeline->base, pAllocator);
|
||||
return result;
|
||||
}
|
||||
|
||||
radv_emit_compute_shader(pdev, cs, shader);
|
||||
|
||||
pipeline->indirect.va = indirect_buffer->deviceAddress;
|
||||
pipeline->indirect.size = indirect_buffer->size;
|
||||
|
||||
/* vkCmdUpdatePipelineIndirectBufferNV() can be called on any queues supporting transfer
|
||||
* operations and it's not required to call it on the same queue as the DGC execute. Because
|
||||
* it's not possible to know if the compute shader uses scratch when DGC execute is called,
|
||||
* the only solution is gather the max scratch size of all indirect pipelines.
|
||||
*/
|
||||
simple_mtx_lock(&device->compute_scratch_mtx);
|
||||
device->compute_scratch_size_per_wave =
|
||||
MAX2(device->compute_scratch_size_per_wave, shader->config.scratch_bytes_per_wave);
|
||||
device->compute_scratch_waves = MAX2(device->compute_scratch_waves, radv_get_max_scratch_waves(device, shader));
|
||||
simple_mtx_unlock(&device->compute_scratch_mtx);
|
||||
}
|
||||
|
||||
*pPipeline = radv_pipeline_to_handle(&pipeline->base);
|
||||
radv_rmv_log_compute_pipeline_create(device, &pipeline->base, pipeline->base.is_internal);
|
||||
return VK_SUCCESS;
|
||||
@@ -371,12 +340,8 @@ radv_create_compute_pipelines(VkDevice _device, VkPipelineCache pipelineCache, u
|
||||
void
|
||||
radv_destroy_compute_pipeline(struct radv_device *device, struct radv_compute_pipeline *pipeline)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = &pipeline->indirect.cs;
|
||||
|
||||
if (pipeline->base.shaders[MESA_SHADER_COMPUTE])
|
||||
radv_shader_unref(device, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
|
||||
|
||||
free(cs->buf);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
|
@@ -19,12 +19,6 @@ struct radv_shader_info;
|
||||
|
||||
struct radv_compute_pipeline {
|
||||
struct radv_pipeline base;
|
||||
|
||||
struct {
|
||||
struct radeon_cmdbuf cs;
|
||||
uint64_t va;
|
||||
uint64_t size;
|
||||
} indirect;
|
||||
};
|
||||
|
||||
RADV_DECL_PIPELINE_DOWNCAST(compute, RADV_PIPELINE_COMPUTE)
|
||||
|
@@ -1268,7 +1268,6 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
|
||||
bool *has_follower)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
bool has_indirect_pipeline_binds = false;
|
||||
|
||||
if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE) {
|
||||
for (uint32_t j = 0; j < cmd_buffer_count; j++) {
|
||||
@@ -1308,16 +1307,6 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
|
||||
needs.sample_positions |= cmd_buffer->sample_positions_needed;
|
||||
*use_perf_counters |= cmd_buffer->state.uses_perf_counters;
|
||||
*has_follower |= !!cmd_buffer->gang.cs;
|
||||
|
||||
has_indirect_pipeline_binds |= cmd_buffer->has_indirect_pipeline_binds;
|
||||
}
|
||||
|
||||
if (has_indirect_pipeline_binds) {
|
||||
/* Use the maximum possible scratch size for indirect compute pipelines with DGC. */
|
||||
simple_mtx_lock(&device->compute_scratch_mtx);
|
||||
needs.compute_scratch_size_per_wave = MAX2(needs.compute_scratch_waves, device->compute_scratch_size_per_wave);
|
||||
needs.compute_scratch_waves = MAX2(needs.compute_scratch_waves, device->compute_scratch_waves);
|
||||
simple_mtx_unlock(&device->compute_scratch_mtx);
|
||||
}
|
||||
|
||||
/* Sanitize scratch size information. */
|
||||
|
@@ -886,13 +886,6 @@ radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_lis
|
||||
if (replayable)
|
||||
flags |= RADEON_FLAG_REPLAYABLE;
|
||||
|
||||
/* vkCmdUpdatePipelineIndirectBufferNV() can be called on any queue supporting transfer
|
||||
* operations and it's not required to call it on the same queue as DGC execute. To make sure the
|
||||
* compute shader BO is part of the DGC execute submission, force all shaders to be local BOs.
|
||||
*/
|
||||
if (device->vk.enabled_features.deviceGeneratedComputePipelines)
|
||||
flags |= RADEON_FLAG_PREFER_LOCAL_BO;
|
||||
|
||||
VkResult result;
|
||||
result = radv_bo_create(device, NULL, arena_size, RADV_SHADER_ALLOC_ALIGNMENT, RADEON_DOMAIN_VRAM, flags,
|
||||
RADV_BO_PRIORITY_SHADER, replay_va, true, &arena->bo);
|
||||
|
@@ -110,6 +110,9 @@ radv_shader_stage_init(const VkShaderCreateInfoEXT *sinfo, struct radv_shader_st
|
||||
out_stage->key.subgroup_require_full = 1;
|
||||
}
|
||||
|
||||
if (sinfo->flags & VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT)
|
||||
out_stage->key.indirect_bindable = 1;
|
||||
|
||||
if (out_stage->stage == MESA_SHADER_MESH) {
|
||||
out_stage->key.has_task_shader = !(sinfo->flags & VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT);
|
||||
}
|
||||
|
@@ -42,7 +42,6 @@ Application bugs worked around in this file:
|
||||
<option name="radv_zero_vram" value="true" />
|
||||
<option name="radv_disable_aniso_single_level" value="true" />
|
||||
<option name="radv_disable_trunc_coord" value="true" />
|
||||
<option name="radv_dgc" value="true" />
|
||||
</engine>
|
||||
|
||||
<engine engine_name_match="DXVK">
|
||||
|
@@ -708,10 +708,6 @@
|
||||
DRI_CONF_OPT_B(radv_disable_depth_storage, def, \
|
||||
"Hides support for storage access to depth formats")
|
||||
|
||||
#define DRI_CONF_RADV_DGC(def) \
|
||||
DRI_CONF_OPT_B(radv_dgc, def, \
|
||||
"Expose an experimental implementation of VK_NV_device_generated_commands on GFX8+")
|
||||
|
||||
#define DRI_CONF_RADV_FLUSH_BEFORE_QUERY_COPY(def) \
|
||||
DRI_CONF_OPT_B( \
|
||||
radv_flush_before_query_copy, def, \
|
||||
|
Reference in New Issue
Block a user