anv: enable ray queries
Only on platforms that support it. v3: Split out code setting up ray query shadow buffer (Caio) Don't forget to setup ray query globals even when no shadow buffer is used (Lionel) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13719>
This commit is contained in:

committed by
Marge Bot

parent
c78be5da30
commit
5d3e419378
@@ -136,6 +136,7 @@ struct intel_device_info
|
||||
bool has_aux_map;
|
||||
bool has_tiling_uapi;
|
||||
bool has_ray_tracing;
|
||||
bool has_ray_query;
|
||||
bool has_local_mem;
|
||||
bool has_lsc;
|
||||
bool has_mesh_shading;
|
||||
|
@@ -483,6 +483,78 @@ set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
|
||||
cmd_buffer->state.push_constants_dirty |= mesa_to_vk_shader_stage(stage);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
ilog2_round_up(uint32_t value)
|
||||
{
|
||||
assert(value != 0);
|
||||
return 32 - __builtin_clz(value - 1);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_cmd_pipeline_state *pipeline_state,
|
||||
struct anv_pipeline *pipeline,
|
||||
VkShaderStageFlags stages)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
uint64_t ray_shadow_size =
|
||||
align_u64(brw_rt_ray_queries_shadow_stacks_size(&device->info,
|
||||
pipeline->ray_queries),
|
||||
4096);
|
||||
if (ray_shadow_size > 0 &&
|
||||
(!cmd_buffer->state.ray_query_shadow_bo ||
|
||||
cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) {
|
||||
unsigned shadow_size_log2 = MAX2(ilog2_round_up(ray_shadow_size), 16);
|
||||
unsigned bucket = shadow_size_log2 - 16;
|
||||
assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos));
|
||||
|
||||
struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[bucket]);
|
||||
if (bo == NULL) {
|
||||
struct anv_bo *new_bo;
|
||||
VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
|
||||
ray_shadow_size,
|
||||
ANV_BO_ALLOC_LOCAL_MEM, /* alloc_flags */
|
||||
0, /* explicit_address */
|
||||
&new_bo);
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_batch_set_error(&cmd_buffer->batch, result);
|
||||
return;
|
||||
}
|
||||
|
||||
bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[bucket], NULL, new_bo);
|
||||
if (bo != NULL) {
|
||||
anv_device_release_bo(device, bo);
|
||||
} else {
|
||||
bo = new_bo;
|
||||
}
|
||||
}
|
||||
cmd_buffer->state.ray_query_shadow_bo = bo;
|
||||
|
||||
/* Add the ray query buffers to the batch list. */
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
cmd_buffer->batch.alloc,
|
||||
cmd_buffer->state.ray_query_shadow_bo);
|
||||
}
|
||||
|
||||
/* Add the HW buffer to the list of BO used. */
|
||||
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
|
||||
cmd_buffer->batch.alloc,
|
||||
device->ray_query_bo);
|
||||
|
||||
/* Fill the push constants & mark them dirty. */
|
||||
struct anv_state ray_query_global_state =
|
||||
anv_genX(&device->info, cmd_buffer_ray_query_globals)(cmd_buffer);
|
||||
|
||||
struct anv_address ray_query_globals_addr = (struct anv_address) {
|
||||
.bo = device->dynamic_state_pool.block_pool.bo,
|
||||
.offset = ray_query_global_state.offset,
|
||||
};
|
||||
pipeline_state->push_constants.ray_query_globals =
|
||||
anv_address_physical(ray_query_globals_addr);
|
||||
cmd_buffer->state.push_constants_dirty |= stages;
|
||||
}
|
||||
|
||||
void anv_CmdBindPipeline(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkPipelineBindPoint pipelineBindPoint,
|
||||
@@ -490,6 +562,8 @@ void anv_CmdBindPipeline(
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
|
||||
struct anv_cmd_pipeline_state *state;
|
||||
VkShaderStageFlags stages = 0;
|
||||
|
||||
switch (pipelineBindPoint) {
|
||||
case VK_PIPELINE_BIND_POINT_COMPUTE: {
|
||||
@@ -502,6 +576,9 @@ void anv_CmdBindPipeline(
|
||||
cmd_buffer->state.compute.pipeline_dirty = true;
|
||||
set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE,
|
||||
&compute_pipeline->cs->bind_map);
|
||||
|
||||
state = &cmd_buffer->state.compute.base;
|
||||
stages = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -525,6 +602,9 @@ void anv_CmdBindPipeline(
|
||||
anv_dynamic_state_copy(&cmd_buffer->state.gfx.dynamic,
|
||||
&gfx_pipeline->dynamic_state,
|
||||
gfx_pipeline->dynamic_state_mask);
|
||||
|
||||
state = &cmd_buffer->state.gfx.base;
|
||||
stages = gfx_pipeline->active_stages;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -541,6 +621,8 @@ void anv_CmdBindPipeline(
|
||||
anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer,
|
||||
rt_pipeline->stack_size);
|
||||
}
|
||||
|
||||
state = &cmd_buffer->state.rt.base;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -548,6 +630,9 @@ void anv_CmdBindPipeline(
|
||||
assert(!"invalid bind point");
|
||||
break;
|
||||
}
|
||||
|
||||
if (pipeline->ray_queries > 0)
|
||||
anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
|
||||
}
|
||||
|
||||
void anv_CmdSetRasterizerDiscardEnableEXT(
|
||||
@@ -1675,13 +1760,6 @@ void anv_CmdSetFragmentShadingRateKHR(
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
ilog2_round_up(uint32_t value)
|
||||
{
|
||||
assert(value != 0);
|
||||
return 32 - __builtin_clz(value - 1);
|
||||
}
|
||||
|
||||
void anv_CmdSetRayTracingPipelineStackSizeKHR(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t pipelineStackSize)
|
||||
|
@@ -221,6 +221,7 @@ get_device_extensions(const struct anv_physical_device *device,
|
||||
device->use_call_secondary,
|
||||
.KHR_pipeline_executable_properties = true,
|
||||
.KHR_push_descriptor = true,
|
||||
.KHR_ray_query = device->info.has_ray_tracing,
|
||||
.KHR_relaxed_block_layout = true,
|
||||
.KHR_sampler_mirror_clamp_to_edge = true,
|
||||
.KHR_sampler_ycbcr_conversion = true,
|
||||
@@ -1640,6 +1641,12 @@ void anv_GetPhysicalDeviceFeatures2(
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: {
|
||||
VkPhysicalDeviceRayQueryFeaturesKHR *features = (void *)ext;
|
||||
features->rayQuery = pdevice->info.has_ray_tracing;
|
||||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
|
||||
VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
|
||||
features->robustBufferAccess2 = true;
|
||||
@@ -3331,9 +3338,22 @@ VkResult anv_CreateDevice(
|
||||
device->workaround_bo->size,
|
||||
INTEL_DEBUG_BLOCK_TYPE_FRAME);
|
||||
|
||||
result = anv_device_init_trivial_batch(device);
|
||||
if (device->vk.enabled_extensions.KHR_ray_query) {
|
||||
uint32_t ray_queries_size =
|
||||
align_u32(brw_rt_ray_queries_hw_stacks_size(&device->info), 4096);
|
||||
|
||||
result = anv_device_alloc_bo(device, "ray queries",
|
||||
ray_queries_size,
|
||||
ANV_BO_ALLOC_LOCAL_MEM,
|
||||
0 /* explicit_address */,
|
||||
&device->ray_query_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_workaround_bo;
|
||||
}
|
||||
|
||||
result = anv_device_init_trivial_batch(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_ray_query_bo;
|
||||
|
||||
if (device->info.ver >= 12 &&
|
||||
device->vk.enabled_extensions.KHR_fragment_shading_rate) {
|
||||
@@ -3403,6 +3423,9 @@ VkResult anv_CreateDevice(
|
||||
anv_scratch_pool_finish(device, &device->scratch_pool);
|
||||
fail_trivial_batch:
|
||||
anv_device_release_bo(device, device->trivial_batch_bo);
|
||||
fail_ray_query_bo:
|
||||
if (device->ray_query_bo)
|
||||
anv_device_release_bo(device, device->ray_query_bo);
|
||||
fail_workaround_bo:
|
||||
anv_device_release_bo(device, device->workaround_bo);
|
||||
fail_surface_aux_map_pool:
|
||||
@@ -3487,6 +3510,13 @@ void anv_DestroyDevice(
|
||||
|
||||
anv_scratch_pool_finish(device, &device->scratch_pool);
|
||||
|
||||
if (device->vk.enabled_extensions.KHR_ray_query) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
|
||||
if (device->ray_query_shadow_bos[i] != NULL)
|
||||
anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
|
||||
}
|
||||
anv_device_release_bo(device, device->ray_query_bo);
|
||||
}
|
||||
anv_device_release_bo(device, device->workaround_bo);
|
||||
anv_device_release_bo(device, device->trivial_batch_bo);
|
||||
|
||||
|
@@ -119,6 +119,8 @@ void genX(cmd_buffer_mark_image_written)(struct anv_cmd_buffer *cmd_buffer,
|
||||
|
||||
void genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
struct anv_state genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void
|
||||
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
||||
const struct intel_l3_config *l3_config,
|
||||
|
@@ -34,6 +34,8 @@
|
||||
#define MAX_SAMPLER_TABLE_SIZE 128
|
||||
#define BINDLESS_OFFSET 255
|
||||
|
||||
#define sizeof_field(type, field) sizeof(((type *)0)->field)
|
||||
|
||||
struct apply_pipeline_layout_state {
|
||||
const struct anv_physical_device *pdevice;
|
||||
|
||||
@@ -1322,6 +1324,21 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_ssa_def *rq_globals =
|
||||
nir_load_push_constant(b, 1, 64, nir_imm_int(b, 0),
|
||||
.base = offsetof(struct anv_push_constants, ray_query_globals),
|
||||
.range = sizeof_field(struct anv_push_constants, ray_query_globals));
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, rq_globals);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
|
||||
{
|
||||
@@ -1360,6 +1377,8 @@ apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
|
||||
return lower_image_intrinsic(b, intrin, state);
|
||||
case nir_intrinsic_load_constant:
|
||||
return lower_load_constant(b, intrin, state);
|
||||
case nir_intrinsic_load_ray_query_global_intel:
|
||||
return lower_ray_query_globals(b, intrin, state);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@@ -142,6 +142,7 @@ anv_shader_compile_to_nir(struct anv_device *device,
|
||||
.post_depth_coverage = pdevice->info.ver >= 9,
|
||||
.runtime_descriptor_array = true,
|
||||
.float_controls = pdevice->info.ver >= 8,
|
||||
.ray_query = pdevice->info.has_ray_tracing,
|
||||
.ray_tracing = pdevice->info.has_ray_tracing,
|
||||
.shader_clock = true,
|
||||
.shader_viewport_index_layer = true,
|
||||
@@ -871,6 +872,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
||||
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
|
||||
nir_address_format_32bit_offset);
|
||||
|
||||
NIR_PASS_V(nir, brw_nir_lower_ray_queries, &pdevice->info);
|
||||
|
||||
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
|
||||
anv_nir_apply_pipeline_layout(pdevice,
|
||||
pipeline->device->robust_buffer_access,
|
||||
@@ -1485,6 +1488,8 @@ anv_pipeline_add_executables(struct anv_pipeline *pipeline,
|
||||
} else {
|
||||
anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
|
||||
}
|
||||
|
||||
pipeline->ray_queries = MAX2(pipeline->ray_queries, bin->prog_data->ray_queries);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
@@ -1210,6 +1210,21 @@ struct anv_device {
|
||||
struct anv_scratch_pool scratch_pool;
|
||||
struct anv_bo *rt_scratch_bos[16];
|
||||
|
||||
/** Shadow ray query BO
|
||||
*
|
||||
* The ray_query_bo only holds the current ray being traced. When using
|
||||
* more than 1 ray query per thread, we cannot fit all the queries in
|
||||
* there, so we need a another buffer to hold query data that is not
|
||||
* currently being used by the HW for tracing, similar to a scratch space.
|
||||
*
|
||||
* The size of the shadow buffer depends on the number of queries per
|
||||
* shader.
|
||||
*/
|
||||
struct anv_bo *ray_query_shadow_bos[16];
|
||||
/** Ray query buffer used to communicated with HW unit.
|
||||
*/
|
||||
struct anv_bo *ray_query_bo;
|
||||
|
||||
struct anv_shader_bin *rt_trampoline;
|
||||
struct anv_shader_bin *rt_trivial_return;
|
||||
|
||||
@@ -2618,8 +2633,8 @@ struct anv_push_constants {
|
||||
/* Robust access pushed registers. */
|
||||
uint64_t push_reg_mask[MESA_SHADER_STAGES];
|
||||
|
||||
/** Pad out to a multiple of 32 bytes */
|
||||
uint32_t pad[2];
|
||||
/** Ray query globals (RT_DISPATCH_GLOBALS) */
|
||||
uint64_t ray_query_globals;
|
||||
|
||||
/* Base addresses for descriptor sets */
|
||||
uint64_t desc_sets[MAX_SETS];
|
||||
@@ -3105,6 +3120,11 @@ struct anv_cmd_state {
|
||||
struct anv_state null_surface_state;
|
||||
|
||||
struct anv_dynamic_render_pass dynamic_render_pass;
|
||||
|
||||
/**
|
||||
* A buffer used for spill/fill of ray queries.
|
||||
*/
|
||||
struct anv_bo * ray_query_shadow_bo;
|
||||
};
|
||||
|
||||
struct anv_cmd_pool {
|
||||
@@ -3463,6 +3483,8 @@ struct anv_pipeline {
|
||||
enum anv_pipeline_type type;
|
||||
VkPipelineCreateFlags flags;
|
||||
|
||||
uint32_t ray_queries;
|
||||
|
||||
struct util_dynarray executables;
|
||||
|
||||
const struct intel_l3_config * l3_config;
|
||||
|
@@ -5461,6 +5461,47 @@ void genX(CmdDispatchIndirect)(
|
||||
trace_intel_end_compute(&cmd_buffer->trace, cmd_buffer, 0, 0, 0);
|
||||
}
|
||||
|
||||
struct anv_state
|
||||
genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
#if GFX_VERx10 >= 125
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
struct anv_state state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
BRW_RT_DISPATCH_GLOBALS_SIZE,
|
||||
64);
|
||||
struct brw_rt_scratch_layout layout;
|
||||
uint32_t stack_ids_per_dss = 2048; /* TODO: can we use a lower value in
|
||||
* some cases?
|
||||
*/
|
||||
brw_rt_compute_scratch_layout(&layout, &device->info,
|
||||
stack_ids_per_dss, 1 << 10);
|
||||
|
||||
struct GFX_RT_DISPATCH_GLOBALS rtdg = {
|
||||
.MemBaseAddress = (struct anv_address) {
|
||||
/* The ray query HW computes offsets from the top of the buffer, so
|
||||
* let the address at the end of the buffer.
|
||||
*/
|
||||
.bo = device->ray_query_bo,
|
||||
.offset = device->ray_query_bo->size
|
||||
},
|
||||
.AsyncRTStackSize = layout.ray_stack_stride / 64,
|
||||
.NumDSSRTStacks = layout.stack_ids_per_dss,
|
||||
.MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS,
|
||||
.Flags = RT_DEPTH_TEST_LESS_EQUAL,
|
||||
.ResumeShaderTable = (struct anv_address) {
|
||||
.bo = cmd_buffer->state.ray_query_shadow_bo,
|
||||
},
|
||||
};
|
||||
GFX_RT_DISPATCH_GLOBALS_pack(NULL, state.map, &rtdg);
|
||||
|
||||
return state;
|
||||
#else
|
||||
unreachable("Not supported");
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VERx10 >= 125
|
||||
static void
|
||||
calc_local_trace_size(uint8_t local_shift[3], const uint32_t global[3])
|
||||
|
Reference in New Issue
Block a user