nir: Add nir_lower_robust_access pass

Add a pass for bounds checking UBOs, SSBOs, and images to implement robustness.
This pass is based on v3d_nir_lower_robust_access.c, with significant
modifications to be appropriate for common code. Notably:

* v3d-isms are removed.
* Stop generating invalid imageSize() instructions for cube maps, this
  blows up nir_validate with asahi's lowerings.
* Logic to wrap an intrinsic in an if-statement is extracted in anticipation of
  future robustness2 support that will reuse that code path for buffers.
* Misc cleanups to follow modern NIR best practice. This pass is noticeably
  shorter than the original v3d version.

For future support of robustness2, I envision the booleans turning into tristate
enums.

There's a few more knobs added for Asahi's benefit. Apple hardware can do
imageLoad and imageStore to non-buffer images (only).  There is no support for
image atomics. To handle, Asahi implements software lowering for buffer images
and for image atomics. While the hardware is robust, the software paths are not.
So we would like to use this pass to lower robustness for the software paths but
not the hardware paths.

Or maybe we want a filter callback?

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23895>
This commit is contained in:
Alyssa Rosenzweig
2023-06-27 16:29:55 -04:00
committed by Marge Bot
parent bafbfc57ea
commit f0fb8d05e3
3 changed files with 262 additions and 0 deletions

View File

@@ -198,6 +198,7 @@ files_libnir = files(
'nir_lower_regs_to_ssa.c',
'nir_lower_readonly_images_to_tex.c',
'nir_lower_returns.c',
'nir_lower_robust_access.c',
'nir_lower_samplers.c',
'nir_lower_scratch.c',
'nir_lower_shader_calls.c',

View File

@@ -5181,6 +5181,47 @@ typedef struct {
bool nir_lower_mem_access_bit_sizes(nir_shader *shader,
const nir_lower_mem_access_bit_sizes_options *options);
typedef struct {
/* Lower load_ubo to be robust. Out-of-bounds loads will return UNDEFINED
* values (not necessarily zero).
*/
bool lower_ubo;
/* Lower load_ssbo/store_ssbo/ssbo_atomic(_swap) to be robust. Out-of-bounds
* loads and atomics will return UNDEFINED values (not necessarily zero).
* Out-of-bounds stores and atomics CORRUPT the contents of the SSBO.
*
* This suffices for robustBufferAccess but not robustBufferAccess2.
*/
bool lower_ssbo;
/* Lower all image_load/image_store/image_atomic(_swap) instructions to be
* robust. Out-of-bounds loads will return ZERO.
*
* This suffices for robustImageAccess but not robustImageAccess2.
*/
bool lower_image;
/* Lower all buffer image instructions as above. Implied by lower_image. */
bool lower_buffer_image;
/* Lower image_atomic(_swap) for all dimensions. Implied by lower_image. */
bool lower_image_atomic;
/* Subtract one from the UBO index */
bool skip_ubo_0;
/* Vulkan's robustBufferAccess feature is only concerned with buffers that
* are bound through descriptor sets, so shared memory is not included, but
* it may be useful to enable this for debugging.
*/
bool lower_shared;
} nir_lower_robust_access_options;
bool nir_lower_robust_access(nir_shader *s,
const nir_lower_robust_access_options *opts);
typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
unsigned align_offset,
unsigned bit_size,

View File

@@ -0,0 +1,220 @@
/*
* Copyright 2023 Valve Corpoation
* Copyright 2020 Raspberry Pi Ltd
* SPDX-License-Identifier: MIT
*/
#include "nir.h"
#include "nir_builder.h"
#include "nir_intrinsics_indices.h"
static void
rewrite_offset(nir_builder *b, nir_intrinsic_instr *instr,
uint32_t type_sz, uint32_t offset_src, nir_ssa_def *size)
{
/* Compute the maximum offset being accessed and if it is out of bounds
* rewrite it to 0 to ensure the access is within bounds.
*/
const uint32_t access_size = instr->num_components * type_sz;
nir_ssa_def *max_access_offset =
nir_iadd_imm(b, instr->src[offset_src].ssa, access_size - 1);
nir_ssa_def *offset =
nir_bcsel(b, nir_uge(b, max_access_offset, size), nir_imm_int(b, 0),
instr->src[offset_src].ssa);
/* Rewrite offset */
nir_instr_rewrite_src_ssa(&instr->instr, &instr->src[offset_src], offset);
}
/*
* Wrap a intrinsic in an if, predicated on a "valid" condition. If the
* intrinsic produces a destination, it will be zero in the invalid case.
*/
static void
wrap_in_if(nir_builder *b, nir_intrinsic_instr *instr, nir_ssa_def *valid)
{
bool has_dest = nir_intrinsic_infos[instr->intrinsic].has_dest;
nir_ssa_def *res, *zero;
if (has_dest) {
zero = nir_imm_zero(b, instr->dest.ssa.num_components,
instr->dest.ssa.bit_size);
}
nir_push_if(b, valid);
{
nir_instr *orig = nir_instr_clone(b->shader, &instr->instr);
nir_builder_instr_insert(b, orig);
if (has_dest)
res = &nir_instr_as_intrinsic(orig)->dest.ssa;
}
nir_pop_if(b, NULL);
if (has_dest)
nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_if_phi(b, res, zero));
/* We've cloned and wrapped, so drop original instruction */
nir_instr_remove(&instr->instr);
}
static void
lower_buffer_load(nir_builder *b,
nir_intrinsic_instr *instr,
const nir_lower_robust_access_options *opts)
{
uint32_t type_sz = nir_dest_bit_size(instr->dest) / 8;
nir_ssa_def *size;
nir_ssa_def *index = instr->src[0].ssa;
if (instr->intrinsic == nir_intrinsic_load_ubo) {
if (opts->skip_ubo_0)
index = nir_iadd_imm(b, index, -1);
size = nir_get_ubo_size(b, 32, index);
} else {
size = nir_get_ssbo_size(b, index);
}
rewrite_offset(b, instr, type_sz, 1, size);
}
static void
lower_buffer_store(nir_builder *b, nir_intrinsic_instr *instr)
{
uint32_t type_sz = nir_src_bit_size(instr->src[0]) / 8;
rewrite_offset(b, instr, type_sz, 2,
nir_get_ssbo_size(b, instr->src[1].ssa));
}
static void
lower_buffer_atomic(nir_builder *b, nir_intrinsic_instr *instr)
{
rewrite_offset(b, instr, 4, 1, nir_get_ssbo_size(b, instr->src[0].ssa));
}
static void
lower_buffer_shared(nir_builder *b, nir_intrinsic_instr *instr)
{
uint32_t type_sz, offset_src;
if (instr->intrinsic == nir_intrinsic_load_shared) {
offset_src = 0;
type_sz = nir_dest_bit_size(instr->dest) / 8;
} else if (instr->intrinsic == nir_intrinsic_store_shared) {
offset_src = 1;
type_sz = nir_src_bit_size(instr->src[0]) / 8;
} else {
/* atomic */
offset_src = 0;
type_sz = 4;
}
rewrite_offset(b, instr, type_sz, offset_src,
nir_imm_int(b, b->shader->info.shared_size));
}
static bool
lower_image(nir_builder *b,
nir_intrinsic_instr *instr,
const nir_lower_robust_access_options *opts)
{
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
bool atomic = (instr->intrinsic == nir_intrinsic_image_atomic ||
instr->intrinsic == nir_intrinsic_image_atomic_swap);
if (!opts->lower_image &&
!(opts->lower_buffer_image && dim == GLSL_SAMPLER_DIM_BUF) &&
!(opts->lower_image_atomic && atomic))
return false;
uint32_t num_coords = nir_image_intrinsic_coord_components(instr);
bool is_array = nir_intrinsic_image_array(instr);
nir_ssa_def *coord = instr->src[1].ssa;
/* Get image size. imageSize for cubes returns the size of a single face. */
unsigned size_components = num_coords;
if (dim == GLSL_SAMPLER_DIM_CUBE && !is_array)
size_components -= 1;
nir_ssa_def *size =
nir_image_size(b, size_components, 32,
instr->src[0].ssa, nir_imm_int(b, 0),
.image_array = is_array, .image_dim = dim);
if (dim == GLSL_SAMPLER_DIM_CUBE) {
nir_ssa_def *z = is_array ? nir_imul_imm(b, nir_channel(b, size, 2), 6)
: nir_imm_int(b, 6);
size = nir_vec3(b, nir_channel(b, size, 0), nir_channel(b, size, 1), z);
}
/* Only execute if coordinates are in-bounds. Otherwise, return zero. */
wrap_in_if(b, instr, nir_ball(b, nir_ult(b, coord, size)));
return true;
}
static bool
lower(nir_builder *b, nir_instr *instr, void *_opts)
{
const nir_lower_robust_access_options *opts = _opts;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
b->cursor = nir_before_instr(instr);
switch (intr->intrinsic) {
case nir_intrinsic_image_load:
case nir_intrinsic_image_store:
case nir_intrinsic_image_atomic:
case nir_intrinsic_image_atomic_swap:
return lower_image(b, intr, opts);
case nir_intrinsic_load_ubo:
if (opts->lower_ubo) {
lower_buffer_load(b, intr, opts);
return true;
}
return false;
case nir_intrinsic_load_ssbo:
if (opts->lower_ssbo) {
lower_buffer_load(b, intr, opts);
return true;
}
return false;
case nir_intrinsic_store_ssbo:
if (opts->lower_ssbo) {
lower_buffer_store(b, intr);
return true;
}
return false;
case nir_intrinsic_ssbo_atomic:
if (opts->lower_ssbo) {
lower_buffer_atomic(b, intr);
return true;
}
return false;
case nir_intrinsic_store_shared:
case nir_intrinsic_load_shared:
case nir_intrinsic_shared_atomic:
case nir_intrinsic_shared_atomic_swap:
if (opts->lower_shared) {
lower_buffer_shared(b, intr);
return true;
}
return false;
default:
return false;
}
}
bool
nir_lower_robust_access(nir_shader *s,
const nir_lower_robust_access_options *opts)
{
return nir_shader_instructions_pass(s, lower, nir_metadata_block_index |
nir_metadata_dominance,
(void*)opts);
}