radv: implement VK_EXT_device_generated_commands

The major differences compared to the NV extensions are:
- support for the sequence index as push constants
- support for draw with count tokens (note that DrawID is zero for
  normal draws)
- support for raytracing
- support for IES (only compute is supported for now)
- improved preprocessing support with the state command buffer param

The NV DGC extensions were only enabled for vkd3d-proton and it will
maintain both paths for a while, so they can be replaced by the EXT.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31383>
This commit is contained in:
Samuel Pitoiset
2024-08-30 22:19:52 +02:00
committed by Marge Bot
parent 637a4b849a
commit 9f8684359f
22 changed files with 1626 additions and 1225 deletions

View File

@@ -1263,11 +1263,12 @@ sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
}
VKAPI_ATTR void VKAPI_CALL
sqtt_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
sqtt_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo)
{
/* There is no ExecuteIndirect Vulkan event in RGP yet. */
API_MARKER_ALIAS(ExecuteGeneratedCommandsNV, ExecuteCommands, commandBuffer, isPreprocessed, pGeneratedCommandsInfo);
API_MARKER_ALIAS(ExecuteGeneratedCommandsEXT, ExecuteCommands, commandBuffer, isPreprocessed,
pGeneratedCommandsInfo);
}
VKAPI_ATTR void VKAPI_CALL

View File

@@ -111,8 +111,8 @@ libradv_files = files(
'radv_device_memory.h',
'radv_descriptor_set.c',
'radv_descriptor_set.h',
'radv_device_generated_commands.c',
'radv_device_generated_commands.h',
'radv_dgc.c',
'radv_dgc.h',
'radv_event.c',
'radv_event.h',
'radv_formats.c',

View File

@@ -511,7 +511,7 @@ radv_device_init_meta(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail_astc_decode;
if (radv_uses_device_generated_commands(device)) {
if (device->vk.enabled_features.deviceGeneratedCommands) {
result = radv_device_init_dgc_prepare_state(device, on_demand);
if (result != VK_SUCCESS)
goto fail_dgc;

View File

@@ -194,23 +194,11 @@ radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize siz
pMemoryRequirements->memoryRequirements.memoryTypeBits =
((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
/* Allow 32-bit address-space for DGC usage, as this buffer will contain
* cmd buffer upload buffers, and those get passed to shaders through 32-bit
* pointers.
*
* We only allow it with this usage set, to "protect" the 32-bit address space
* from being overused. The actual requirement is done as part of
* vkGetGeneratedCommandsMemoryRequirementsNV. (we have to make sure their
* intersection is non-zero at least)
*/
if ((usage & VK_BUFFER_USAGE_2_INDIRECT_BUFFER_BIT_KHR) && radv_uses_device_generated_commands(device))
pMemoryRequirements->memoryRequirements.memoryTypeBits |= pdev->memory_types_32bit;
/* Force 32-bit address-space for descriptor buffers usage because they are passed to shaders
* through 32-bit pointers.
*/
if (usage &
(VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT))
if (usage & (VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT |
VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_2_PREPROCESS_BUFFER_BIT_EXT))
pMemoryRequirements->memoryRequirements.memoryTypeBits = pdev->memory_types_32bit;
if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)

View File

@@ -13,7 +13,7 @@
#include "radv_cp_dma.h"
#include "radv_cs.h"
#include "radv_debug.h"
#include "radv_device_generated_commands.h"
#include "radv_dgc.h"
#include "radv_event.h"
#include "radv_pipeline_rt.h"
#include "radv_radeon_winsys.h"
@@ -477,7 +477,6 @@ radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandB
cmd_buffer->gang.sem.emitted_leader_value = 0;
cmd_buffer->gang.sem.va = 0;
cmd_buffer->shader_upload_seq = 0;
cmd_buffer->has_indirect_pipeline_binds = false;
if (cmd_buffer->upload.upload_bo)
radv_cs_add_buffer(device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo);
@@ -646,8 +645,8 @@ radv_gang_barrier(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_
/* Add stage flush only when necessary. */
if (src_stage_mask & (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV))
VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_EXT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
cmd_buffer->gang.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
/* Block task shaders when we have to wait for CP DMA on the GFX cmdbuf. */
@@ -6645,9 +6644,10 @@ radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_s
if (src_stage_mask &
(VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) {
VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_EXT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) {
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
}
@@ -6719,7 +6719,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2
has_DB_meta = false;
}
if (src_flags & VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_NV)
if (src_flags & VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_EXT)
flush_bits |= RADV_CMD_FLAG_INV_L2;
if (src_flags & (VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT | VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)) {
@@ -6808,9 +6808,8 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2
flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
/* Ensure the DGC meta shader can read the commands. */
if (radv_uses_device_generated_commands(device)) {
if (device->vk.enabled_features.deviceGeneratedCommands) {
flush_bits |= RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE;
if (pdev->info.gfx_level < GFX9)
flush_bits |= RADV_CMD_FLAG_INV_L2;
}
@@ -6849,7 +6848,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2
flush_bits |= RADV_CMD_FLAG_INV_L2;
}
if (dst_flags & VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_NV) {
if (dst_flags & VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_EXT) {
flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
if (pdev->info.gfx_level < GFX9)
flush_bits |= RADV_CMD_FLAG_INV_L2;
@@ -11558,52 +11557,31 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b
}
/* TODO: Use these functions with the normal dispatch path. */
static void radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer);
static void radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point);
static void radv_dgc_after_dispatch(struct radv_cmd_buffer *cmd_buffer);
VKAPI_ATTR void VKAPI_CALL
radv_CmdPreprocessGeneratedCommandsNV(VkCommandBuffer commandBuffer,
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
if (!radv_dgc_can_preprocess(layout, pipeline))
return;
/* VK_EXT_conditional_rendering says that copy commands should not be
* affected by conditional rendering.
*/
const bool old_predicating = cmd_buffer->state.predicating;
cmd_buffer->state.predicating = false;
radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo, old_predicating);
/* Restore conditional rendering. */
cmd_buffer->state.predicating = old_predicating;
}
/* VK_EXT_device_generated_commands */
static void
radv_dgc_execute_ib(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
radv_dgc_execute_ib(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo)
{
VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const bool has_task_shader = radv_dgc_with_task_shader(pGeneratedCommandsInfo);
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const VkGeneratedCommandsPipelineInfoEXT *pipeline_info =
vk_find_struct_const(pGeneratedCommandsInfo->pNext, GENERATED_COMMANDS_PIPELINE_INFO_EXT);
const VkGeneratedCommandsShaderInfoEXT *eso_info =
vk_find_struct_const(pGeneratedCommandsInfo->pNext, GENERATED_COMMANDS_SHADER_INFO_EXT);
const struct radv_shader *task_shader = radv_dgc_get_shader(pipeline_info, eso_info, MESA_SHADER_TASK);
const uint32_t cmdbuf_size = radv_get_indirect_main_cmdbuf_size(pGeneratedCommandsInfo);
const uint64_t ib_va =
radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + pGeneratedCommandsInfo->preprocessOffset;
const uint64_t main_trailer_va = ib_va + radv_get_indirect_main_trailer_offset(pGeneratedCommandsInfo);
const uint64_t ib_va = pGeneratedCommandsInfo->preprocessAddress;
const uint64_t main_ib_va = ib_va + radv_get_indirect_main_cmdbuf_offset(pGeneratedCommandsInfo);
const uint64_t main_trailer_va = ib_va + radv_get_indirect_main_trailer_offset(pGeneratedCommandsInfo);
device->ws->cs_chain_dgc_ib(cmd_buffer->cs, main_ib_va, cmdbuf_size >> 2, main_trailer_va,
cmd_buffer->state.predicating);
if (has_task_shader) {
if (task_shader) {
const uint32_t ace_cmdbuf_size = radv_get_indirect_ace_cmdbuf_size(pGeneratedCommandsInfo);
const uint64_t ace_trailer_va = ib_va + radv_get_indirect_ace_trailer_offset(pGeneratedCommandsInfo);
const uint64_t ace_ib_va = ib_va + radv_get_indirect_ace_cmdbuf_offset(pGeneratedCommandsInfo);
const uint64_t ace_trailer_va = ib_va + radv_get_indirect_ace_trailer_offset(pGeneratedCommandsInfo);
assert(cmd_buffer->gang.cs);
device->ws->cs_chain_dgc_ib(cmd_buffer->gang.cs, ace_ib_va, ace_cmdbuf_size >> 2, ace_trailer_va,
@@ -11612,82 +11590,82 @@ radv_dgc_execute_ib(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommand
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
radv_CmdExecuteGeneratedCommandsEXT(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const bool compute = layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE;
VK_FROM_HANDLE(radv_indirect_execution_set, ies, pGeneratedCommandsInfo->indirectExecutionSet);
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const bool use_predication = radv_use_dgc_predication(cmd_buffer, pGeneratedCommandsInfo);
const bool compute = !!(layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_DISPATCH));
const bool rt = !!(layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_RT));
const VkGeneratedCommandsPipelineInfoEXT *pipeline_info =
vk_find_struct_const(pGeneratedCommandsInfo->pNext, GENERATED_COMMANDS_PIPELINE_INFO_EXT);
const VkGeneratedCommandsShaderInfoEXT *eso_info =
vk_find_struct_const(pGeneratedCommandsInfo->pNext, GENERATED_COMMANDS_SHADER_INFO_EXT);
/* Secondary command buffers are needed for the full extension but can't use
* PKT3_INDIRECT_BUFFER.
*/
if (ies) {
radv_cs_add_buffer(device->ws, cmd_buffer->cs, ies->bo);
cmd_buffer->compute_scratch_size_per_wave_needed =
MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, ies->compute_scratch_size_per_wave);
cmd_buffer->compute_scratch_waves_wanted =
MAX2(cmd_buffer->compute_scratch_waves_wanted, ies->compute_scratch_waves);
}
/* Secondary command buffers are banned. */
assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
if (use_predication) {
VK_FROM_HANDLE(radv_buffer, seq_count_buffer, pGeneratedCommandsInfo->sequencesCountBuffer);
const uint64_t va = radv_buffer_get_va(seq_count_buffer->bo) + seq_count_buffer->offset +
pGeneratedCommandsInfo->sequencesCountOffset;
const uint64_t va = pGeneratedCommandsInfo->sequenceCountAddress;
radv_begin_conditional_rendering(cmd_buffer, va, true);
}
if (!radv_dgc_can_preprocess(layout, pipeline)) {
if (!(layout->vk.usage & VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EXPLICIT_PREPROCESS_BIT_EXT)) {
/* Suspend conditional rendering when the DGC execute is called on the compute queue to
* generate a cmdbuf which will skips dispatches when necessary. This is because the
* compute queue is missing IB2 which means it's not possible to skip the cmdbuf entirely.
* It should also be suspended when task shaders are used because the DGC ACE IB would be
* generate a cmdbuf which will skips dispatches when necessary. This is because the compute
* queue is missing IB2 which means it's not possible to skip the cmdbuf entirely. This
* should also be suspended when task shaders are used because the DGC ACE IB would be
* uninitialized otherwise.
*/
const bool suspend_cond_render =
(cmd_buffer->qf == RADV_QUEUE_COMPUTE || radv_dgc_with_task_shader(pGeneratedCommandsInfo));
const bool suspend_conditional_rendering =
(cmd_buffer->qf == RADV_QUEUE_COMPUTE || radv_dgc_get_shader(pipeline_info, eso_info, MESA_SHADER_TASK));
const bool old_predicating = cmd_buffer->state.predicating;
if (suspend_cond_render && cmd_buffer->state.predicating) {
if (suspend_conditional_rendering && cmd_buffer->state.predicating) {
cmd_buffer->state.predicating = false;
}
radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo, old_predicating);
radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo, cmd_buffer, old_predicating);
if (suspend_cond_render) {
if (suspend_conditional_rendering) {
cmd_buffer->state.predicating = old_predicating;
}
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2;
if (radv_dgc_with_task_shader(pGeneratedCommandsInfo)) {
/* Make sure the DGC ACE IB will wait for the DGC prepare shader before the execution
* starts.
*/
/* Make sure the DGC ACE IB will wait for the DGC prepare shader before the execution
* starts.
*/
if (radv_dgc_get_shader(pipeline_info, eso_info, MESA_SHADER_TASK)) {
radv_gang_barrier(cmd_buffer, VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV,
VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT);
}
}
if (compute) {
radv_dgc_before_dispatch(cmd_buffer);
if (!pGeneratedCommandsInfo->pipeline)
cmd_buffer->has_indirect_pipeline_binds = true;
if (rt) {
radv_dgc_before_dispatch(cmd_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
} else if (compute) {
radv_dgc_before_dispatch(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
} else {
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
struct radv_draw_info info;
struct radv_draw_info info = {
.count = pGeneratedCommandsInfo->maxSequenceCount,
.indirect = (void *)&info,
.indexed = !!(layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_INDEXED)),
};
info.count = pGeneratedCommandsInfo->sequencesCount;
info.indirect = prep_buffer; /* We're not really going use it this way, but a good signal
that this is not direct. */
info.indirect_offset = 0;
info.stride = 0;
info.strmout_buffer = NULL;
info.count_buffer = NULL;
info.indexed = layout->indexed;
info.instance_count = 0;
if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_MESH)) {
if (layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_MESH)) {
if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1, true))
return;
} else {
@@ -11696,46 +11674,63 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre
}
}
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
radeon_emit(cmd_buffer->cs, 0);
}
radv_cs_add_buffer(device->ws, cmd_buffer->cs, prep_buffer->bo);
if (compute || !view_mask) {
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
if (rt || compute || !view_mask) {
radv_dgc_execute_ib(cmd_buffer, pGeneratedCommandsInfo);
} else {
u_foreach_bit (view, view_mask) {
radv_emit_view_index(&cmd_buffer->state, cmd_buffer->cs, view);
radv_dgc_execute_ib(cmd_buffer, pGeneratedCommandsInfo);
}
}
if (compute) {
if (rt) {
cmd_buffer->push_constant_stages |= RADV_RT_STAGE_BITS;
radv_dgc_after_dispatch(cmd_buffer);
} else if (compute) {
cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
if (!pGeneratedCommandsInfo->pipeline)
if (ies)
radv_mark_descriptor_sets_dirty(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
radv_dgc_after_dispatch(cmd_buffer);
} else {
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
if (layout->binds_index_buffer) {
if (layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_IB)) {
cmd_buffer->state.last_index_type = -1;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
}
if (layout->bind_vbo_mask)
if (layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_VB))
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER;
cmd_buffer->push_constant_stages |= graphics_pipeline->active_stages;
if (pipeline_info) {
VK_FROM_HANDLE(radv_pipeline, pipeline, pipeline_info->pipeline);
struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
cmd_buffer->push_constant_stages |= graphics_pipeline->active_stages;
} else {
assert(eso_info);
for (unsigned i = 0; i < eso_info->shaderCount; ++i) {
VK_FROM_HANDLE(radv_shader_object, shader_object, eso_info->pShaders[i]);
cmd_buffer->push_constant_stages |= mesa_to_vk_shader_stage(shader_object->stage);
}
}
if (!(layout->vk.dgc_info & BITFIELD_BIT(MESA_VK_DGC_DRAW_INDEXED))) {
/* Non-indexed draws overwrite VGT_INDEX_TYPE, so the state must be
* re-emitted before the next indexed draw.
*/
cmd_buffer->state.last_index_type = -1;
}
cmd_buffer->state.last_index_type = -1;
cmd_buffer->state.last_num_instances = -1;
cmd_buffer->state.last_vertex_offset_valid = false;
cmd_buffer->state.last_first_instance = -1;
@@ -12102,12 +12097,16 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_inf
}
static void
radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer)
radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_compute_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
struct radv_shader *compute_shader = cmd_buffer->state.shaders[MESA_SHADER_COMPUTE];
struct radv_compute_pipeline *pipeline = bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
? &cmd_buffer->state.rt_pipeline->base
: cmd_buffer->state.compute_pipeline;
struct radv_shader *compute_shader = bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
? cmd_buffer->state.rt_pipeline->prolog
: cmd_buffer->state.shaders[MESA_SHADER_COMPUTE];
bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline;
/* We will have run the DGC patch shaders before, so we can assume that there is something to
@@ -12119,9 +12118,11 @@ radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer)
if (pipeline)
radv_emit_compute_pipeline(cmd_buffer, pipeline);
if (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR)
radv_emit_rt_stack_size(cmd_buffer);
radv_emit_cache_flush(cmd_buffer);
radv_upload_compute_shader_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE);
radv_upload_compute_shader_descriptors(cmd_buffer, bind_point);
if (pipeline_is_dirty) {
const bool has_prefetch = pdev->info.gfx_level >= GFX7;
@@ -12136,7 +12137,9 @@ radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer)
* We only need to do this when the pipeline is dirty because when we switch between
* the two we always need to switch pipelines.
*/
radv_mark_descriptor_sets_dirty(cmd_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
radv_mark_descriptor_sets_dirty(cmd_buffer, bind_point == VK_PIPELINE_BIND_POINT_COMPUTE
? VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
: VK_PIPELINE_BIND_POINT_COMPUTE);
}
}
@@ -13672,42 +13675,6 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag
assert(cmd_buffer->cs->cdw <= cdw_max);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdBindPipelineShaderGroupNV(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
VkPipeline pipeline, uint32_t groupIndex)
{
fprintf(stderr, "radv: unimplemented vkCmdBindPipelineShaderGroupNV\n");
abort();
}
/* VK_NV_device_generated_commands_compute */
VKAPI_ATTR void VKAPI_CALL
radv_CmdUpdatePipelineIndirectBufferNV(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
VkPipeline _pipeline)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
const struct radeon_cmdbuf *cs = &compute_pipeline->indirect.cs;
const uint64_t va = compute_pipeline->indirect.va;
struct radv_compute_pipeline_metadata metadata;
uint32_t offset = 0;
radv_get_compute_shader_metadata(device, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], &metadata);
radv_write_data(cmd_buffer, V_370_ME, va + offset, sizeof(metadata) / 4, (const uint32_t *)&metadata, false);
offset += sizeof(metadata);
radv_write_data(cmd_buffer, V_370_ME, va + offset, 1, (const uint32_t *)&cs->cdw, false);
offset += sizeof(uint32_t);
radv_write_data(cmd_buffer, V_370_ME, va + offset, cs->cdw, (const uint32_t *)cs->buf, false);
offset += cs->cdw * sizeof(uint32_t);
assert(offset < compute_pipeline->indirect.size);
}
/* VK_EXT_descriptor_buffer */
VKAPI_ATTR void VKAPI_CALL
radv_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer, uint32_t bufferCount,

View File

@@ -537,7 +537,6 @@ struct radv_cmd_buffer {
bool gds_needed; /* for GFX10 streamout and NGG GS queries */
bool gds_oa_needed; /* for GFX10 streamout */
bool sample_positions_needed;
bool has_indirect_pipeline_binds;
uint64_t gfx9_fence_va;
uint32_t gfx9_fence_idx;

View File

@@ -1100,7 +1100,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
simple_mtx_init(&device->trace_mtx, mtx_plain);
simple_mtx_init(&device->pstate_mtx, mtx_plain);
simple_mtx_init(&device->rt_handles_mtx, mtx_plain);
simple_mtx_init(&device->compute_scratch_mtx, mtx_plain);
simple_mtx_init(&device->pso_cache_stats_mtx, mtx_plain);
device->rt_handles = _mesa_hash_table_create(NULL, _mesa_hash_u32, _mesa_key_u32_equal);
@@ -1359,7 +1358,6 @@ fail_queue:
simple_mtx_destroy(&device->pstate_mtx);
simple_mtx_destroy(&device->trace_mtx);
simple_mtx_destroy(&device->rt_handles_mtx);
simple_mtx_destroy(&device->compute_scratch_mtx);
simple_mtx_destroy(&device->pso_cache_stats_mtx);
mtx_destroy(&device->overallocation_mutex);
@@ -1417,7 +1415,6 @@ radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
simple_mtx_destroy(&device->pstate_mtx);
simple_mtx_destroy(&device->trace_mtx);
simple_mtx_destroy(&device->rt_handles_mtx);
simple_mtx_destroy(&device->compute_scratch_mtx);
simple_mtx_destroy(&device->pso_cache_stats_mtx);
radv_destroy_shader_arenas(device);

View File

@@ -541,11 +541,6 @@ struct radv_device {
/* Not NULL if a GPU hang report has been generated for VK_EXT_device_fault. */
char *gpu_hang_report;
/* For indirect compute pipeline binds with DGC only. */
simple_mtx_t compute_scratch_mtx;
uint32_t compute_scratch_size_per_wave;
uint32_t compute_scratch_waves;
/* PSO cache stats */
simple_mtx_t pso_cache_stats_mtx;
struct radv_pso_cache_stats pso_cache_stats[RADV_PIPELINE_TYPE_COUNT];
@@ -559,12 +554,6 @@ radv_device_physical(const struct radv_device *dev)
return (struct radv_physical_device *)dev->vk.physical;
}
static inline bool
radv_uses_device_generated_commands(const struct radv_device *device)
{
return device->vk.enabled_features.deviceGeneratedCommandsNV || device->vk.enabled_features.deviceGeneratedCompute;
}
static inline bool
radv_uses_primitives_generated_query(const struct radv_device *device)
{

View File

@@ -1,82 +0,0 @@
/*
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
*
* based in part on anv driver which is:
* Copyright © 2015 Intel Corporation
*
* SPDX-License-Identifier: MIT
*/
#ifndef RADV_DEVICE_GENERATED_COMMANDS_H
#define RADV_DEVICE_GENERATED_COMMANDS_H
#include "vk_object.h"
#include "radv_constants.h"
struct radv_cmd_buffer;
struct radv_pipeline;
struct radv_indirect_command_layout {
struct vk_object_base base;
VkIndirectCommandsLayoutUsageFlagsNV flags;
VkPipelineBindPoint pipeline_bind_point;
uint32_t input_stride;
uint32_t token_count;
bool indexed;
bool binds_index_buffer;
bool draw_mesh_tasks;
uint16_t draw_params_offset;
uint16_t index_buffer_offset;
uint16_t dispatch_params_offset;
bool bind_pipeline;
uint16_t pipeline_params_offset;
bool vertex_dynamic_stride;
uint32_t bind_vbo_mask;
uint32_t vbo_offsets[MAX_VBS];
uint64_t push_constant_mask;
uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4];
uint32_t push_constant_size;
uint32_t ibo_type_32;
uint32_t ibo_type_8;
VkPipeline pipeline;
VkIndirectCommandsLayoutTokenNV tokens[0];
};
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, base, VkIndirectCommandsLayoutNV,
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV)
uint32_t radv_get_indirect_main_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
uint32_t radv_get_indirect_ace_cmdbuf_size(const VkGeneratedCommandsInfoNV *cmd_info);
uint32_t radv_get_indirect_main_cmdbuf_offset(const VkGeneratedCommandsInfoNV *cmd_info);
uint32_t radv_get_indirect_ace_cmdbuf_offset(const VkGeneratedCommandsInfoNV *cmd_info);
uint32_t radv_get_indirect_main_trailer_offset(const VkGeneratedCommandsInfoNV *cmd_info);
uint32_t radv_get_indirect_ace_trailer_offset(const VkGeneratedCommandsInfoNV *cmd_info);
bool radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer,
const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
bool radv_dgc_can_preprocess(const struct radv_indirect_command_layout *layout, struct radv_pipeline *pipeline);
bool radv_dgc_with_task_shader(const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo);
void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo,
bool cond_render_enabled);
#endif /* RADV_DEVICE_GENERATED_COMMANDS_H */

66
src/amd/vulkan/radv_dgc.h Normal file
View File

@@ -0,0 +1,66 @@
/*
* Copyright © 2024 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#ifndef RADV_DGC_H
#define RADV_DGC_H
#include "compiler/shader_enums.h"
#include "radv_constants.h"
#include "vk_device_generated_commands.h"
struct radv_cmd_buffer;
enum radv_queue_family;
struct radv_indirect_command_layout {
struct vk_indirect_command_layout vk;
uint64_t push_constant_mask;
uint32_t push_constant_offsets[MAX_PUSH_CONSTANTS_SIZE / 4];
uint64_t sequence_index_mask;
VkPipeline pipeline;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_command_layout, vk.base, VkIndirectCommandsLayoutEXT,
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_EXT)
struct radv_indirect_execution_set {
struct vk_object_base base;
struct radeon_winsys_bo *bo;
uint64_t va;
uint8_t *mapped_ptr;
uint32_t stride;
uint32_t compute_scratch_size_per_wave;
uint32_t compute_scratch_waves;
};
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_indirect_execution_set, base, VkIndirectExecutionSetEXT,
VK_OBJECT_TYPE_INDIRECT_EXECUTION_SET_EXT);
uint32_t radv_get_indirect_main_cmdbuf_offset(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
uint32_t radv_get_indirect_ace_cmdbuf_offset(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
uint32_t radv_get_indirect_main_cmdbuf_size(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
uint32_t radv_get_indirect_ace_cmdbuf_size(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
uint32_t radv_get_indirect_main_trailer_offset(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
uint32_t radv_get_indirect_ace_trailer_offset(const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
void radv_prepare_dgc(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo,
struct radv_cmd_buffer *state_cmd_buffer, bool cond_render_enabled);
bool radv_use_dgc_predication(struct radv_cmd_buffer *cmd_buffer,
const VkGeneratedCommandsInfoEXT *pGeneratedCommandsInfo);
struct radv_shader *radv_dgc_get_shader(const VkGeneratedCommandsPipelineInfoEXT *pipeline_info,
const VkGeneratedCommandsShaderInfoEXT *eso_info, gl_shader_stage stage);
#endif /* RADV_DGC_H */

View File

@@ -148,7 +148,6 @@ static const driOptionDescription radv_dri_options[] = {
DRI_CONF_RADV_DISABLE_TRUNC_COORD(false)
DRI_CONF_RADV_DISABLE_SINKING_LOAD_INPUT_FS(false)
DRI_CONF_RADV_DISABLE_DEPTH_STORAGE(false)
DRI_CONF_RADV_DGC(false)
DRI_CONF_RADV_FLUSH_BEFORE_QUERY_COPY(false)
DRI_CONF_RADV_ENABLE_UNIFIED_HEAP_ON_APU(false)
DRI_CONF_RADV_TEX_NON_UNIFORM(false)
@@ -243,8 +242,6 @@ radv_init_dri_options(struct radv_instance *instance)
instance->drirc.override_ray_tracing_shader_version =
driQueryOptioni(&instance->drirc.options, "radv_override_ray_tracing_shader_version");
instance->drirc.enable_dgc = driQueryOptionb(&instance->drirc.options, "radv_dgc");
instance->drirc.override_vram_size = driQueryOptioni(&instance->drirc.options, "override_vram_size");
instance->drirc.enable_khr_present_wait = driQueryOptionb(&instance->drirc.options, "vk_khr_present_wait");

View File

@@ -66,7 +66,6 @@ struct radv_instance {
bool legacy_sparse_binding;
bool force_pstate_peak_gfx11_dgpu;
bool clear_lds;
bool enable_dgc;
bool enable_khr_present_wait;
bool report_llvm9_version_string;
bool vk_require_etc2;

View File

@@ -747,10 +747,6 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
.INTEL_shader_integer_functions2 = true,
.MESA_image_alignment_control = pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level <= GFX11_5,
.NV_compute_shader_derivatives = true,
.NV_device_generated_commands =
pdev->info.gfx_level >= GFX8 && instance->drirc.enable_dgc && !(instance->debug_flags & RADV_DEBUG_NO_IBS),
.NV_device_generated_commands_compute =
pdev->info.gfx_level >= GFX8 && instance->drirc.enable_dgc && !(instance->debug_flags & RADV_DEBUG_NO_IBS),
/* Undocumented extension purely for vkd3d-proton. This check is to prevent anyone else from
* using it.
*/
@@ -1129,9 +1125,6 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc
.performanceCounterQueryPools = has_perf_query,
.performanceCounterMultipleQueryPools = has_perf_query,
/* VK_NV_device_generated_commands */
.deviceGeneratedCommandsNV = true,
/* VK_EXT_attachment_feedback_loop_layout */
.attachmentFeedbackLoopLayout = true,
@@ -1214,11 +1207,6 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc
/* VK_KHR_maintenance5 */
.maintenance5 = true,
/* VK_NV_device_generated_commands_compute */
.deviceGeneratedCompute = true,
.deviceGeneratedComputePipelines = true,
.deviceGeneratedComputeCaptureReplay = false,
/* VK_KHR_cooperative_matrix */
.cooperativeMatrix = pdev->info.gfx_level >= GFX11 && !pdev->use_llvm,
.cooperativeMatrixRobustBufferAccess = pdev->info.gfx_level >= GFX11 && !pdev->use_llvm,
@@ -1830,20 +1818,6 @@ radv_get_physical_device_properties(struct radv_physical_device *pdev)
/* VK_KHR_performance_query */
p->allowCommandBufferQueryCopies = false;
/* VK_NV_device_generated_commands */
p->maxIndirectCommandsStreamCount = 1;
p->maxIndirectCommandsStreamStride = UINT32_MAX;
p->maxIndirectCommandsTokenCount = 512;
p->maxIndirectCommandsTokenOffset = UINT16_MAX;
p->minIndirectCommandsBufferOffsetAlignment = 4;
p->minSequencesCountBufferOffsetAlignment = 4;
p->minSequencesIndexBufferOffsetAlignment = 4;
/* Don't support even a shader group count = 1 until we support shader
* overrides during pipeline creation. */
p->maxGraphicsShaderGroupCount = 0;
/* MSB reserved for signalling indirect count enablement. */
p->maxIndirectSequenceCount = UINT32_MAX >> 1;
/* VK_EXT_graphics_pipeline_library */
p->graphicsPipelineLibraryFastLinking = true;
p->graphicsPipelineLibraryIndependentInterpolationDecoration = true;

View File

@@ -137,7 +137,7 @@ radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineS
if (flags & VK_PIPELINE_CREATE_2_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR)
key.view_index_from_device_index = 1;
if (flags & VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV)
if (flags & VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_EXT)
key.indirect_bindable = 1;
if (stage->stage & RADV_GRAPHICS_STAGE_BITS) {

View File

@@ -305,37 +305,6 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkC
radv_compute_pipeline_init(pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
if (pipeline->base.create_flags & VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV) {
const VkComputePipelineIndirectBufferInfoNV *indirect_buffer =
vk_find_struct_const(pCreateInfo->pNext, COMPUTE_PIPELINE_INDIRECT_BUFFER_INFO_NV);
struct radv_shader *shader = pipeline->base.shaders[MESA_SHADER_COMPUTE];
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = &pipeline->indirect.cs;
cs->reserved_dw = cs->max_dw = 32;
cs->buf = malloc(cs->max_dw * 4);
if (!cs->buf) {
radv_pipeline_destroy(device, &pipeline->base, pAllocator);
return result;
}
radv_emit_compute_shader(pdev, cs, shader);
pipeline->indirect.va = indirect_buffer->deviceAddress;
pipeline->indirect.size = indirect_buffer->size;
/* vkCmdUpdatePipelineIndirectBufferNV() can be called on any queues supporting transfer
* operations and it's not required to call it on the same queue as the DGC execute. Because
* it's not possible to know if the compute shader uses scratch when DGC execute is called,
* the only solution is gather the max scratch size of all indirect pipelines.
*/
simple_mtx_lock(&device->compute_scratch_mtx);
device->compute_scratch_size_per_wave =
MAX2(device->compute_scratch_size_per_wave, shader->config.scratch_bytes_per_wave);
device->compute_scratch_waves = MAX2(device->compute_scratch_waves, radv_get_max_scratch_waves(device, shader));
simple_mtx_unlock(&device->compute_scratch_mtx);
}
*pPipeline = radv_pipeline_to_handle(&pipeline->base);
radv_rmv_log_compute_pipeline_create(device, &pipeline->base, pipeline->base.is_internal);
return VK_SUCCESS;
@@ -371,12 +340,8 @@ radv_create_compute_pipelines(VkDevice _device, VkPipelineCache pipelineCache, u
void
radv_destroy_compute_pipeline(struct radv_device *device, struct radv_compute_pipeline *pipeline)
{
struct radeon_cmdbuf *cs = &pipeline->indirect.cs;
if (pipeline->base.shaders[MESA_SHADER_COMPUTE])
radv_shader_unref(device, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
free(cs->buf);
}
VKAPI_ATTR VkResult VKAPI_CALL

View File

@@ -19,12 +19,6 @@ struct radv_shader_info;
struct radv_compute_pipeline {
struct radv_pipeline base;
struct {
struct radeon_cmdbuf cs;
uint64_t va;
uint64_t size;
} indirect;
};
RADV_DECL_PIPELINE_DOWNCAST(compute, RADV_PIPELINE_COMPUTE)

View File

@@ -1268,7 +1268,6 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
bool *has_follower)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
bool has_indirect_pipeline_binds = false;
if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE) {
for (uint32_t j = 0; j < cmd_buffer_count; j++) {
@@ -1308,16 +1307,6 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
needs.sample_positions |= cmd_buffer->sample_positions_needed;
*use_perf_counters |= cmd_buffer->state.uses_perf_counters;
*has_follower |= !!cmd_buffer->gang.cs;
has_indirect_pipeline_binds |= cmd_buffer->has_indirect_pipeline_binds;
}
if (has_indirect_pipeline_binds) {
/* Use the maximum possible scratch size for indirect compute pipelines with DGC. */
simple_mtx_lock(&device->compute_scratch_mtx);
needs.compute_scratch_size_per_wave = MAX2(needs.compute_scratch_waves, device->compute_scratch_size_per_wave);
needs.compute_scratch_waves = MAX2(needs.compute_scratch_waves, device->compute_scratch_waves);
simple_mtx_unlock(&device->compute_scratch_mtx);
}
/* Sanitize scratch size information. */

View File

@@ -886,13 +886,6 @@ radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_lis
if (replayable)
flags |= RADEON_FLAG_REPLAYABLE;
/* vkCmdUpdatePipelineIndirectBufferNV() can be called on any queue supporting transfer
* operations and it's not required to call it on the same queue as DGC execute. To make sure the
* compute shader BO is part of the DGC execute submission, force all shaders to be local BOs.
*/
if (device->vk.enabled_features.deviceGeneratedComputePipelines)
flags |= RADEON_FLAG_PREFER_LOCAL_BO;
VkResult result;
result = radv_bo_create(device, NULL, arena_size, RADV_SHADER_ALLOC_ALIGNMENT, RADEON_DOMAIN_VRAM, flags,
RADV_BO_PRIORITY_SHADER, replay_va, true, &arena->bo);

View File

@@ -110,6 +110,9 @@ radv_shader_stage_init(const VkShaderCreateInfoEXT *sinfo, struct radv_shader_st
out_stage->key.subgroup_require_full = 1;
}
if (sinfo->flags & VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT)
out_stage->key.indirect_bindable = 1;
if (out_stage->stage == MESA_SHADER_MESH) {
out_stage->key.has_task_shader = !(sinfo->flags & VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT);
}

View File

@@ -42,7 +42,6 @@ Application bugs worked around in this file:
<option name="radv_zero_vram" value="true" />
<option name="radv_disable_aniso_single_level" value="true" />
<option name="radv_disable_trunc_coord" value="true" />
<option name="radv_dgc" value="true" />
</engine>
<engine engine_name_match="DXVK">

View File

@@ -708,10 +708,6 @@
DRI_CONF_OPT_B(radv_disable_depth_storage, def, \
"Hides support for storage access to depth formats")
#define DRI_CONF_RADV_DGC(def) \
DRI_CONF_OPT_B(radv_dgc, def, \
"Expose an experimental implementation of VK_NV_device_generated_commands on GFX8+")
#define DRI_CONF_RADV_FLUSH_BEFORE_QUERY_COPY(def) \
DRI_CONF_OPT_B( \
radv_flush_before_query_copy, def, \