asahi: implement rba2 semantics for vbo
Different APIs have different robustness requirements for VBOs. Add a knob to select the desired robustness so we can implement rba2 in honeykrisp. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29742>
This commit is contained in:
@@ -61,6 +61,9 @@ struct agx_vs_prolog_key {
|
|||||||
|
|
||||||
/* If !hw and the draw call is indexed, the index size */
|
/* If !hw and the draw call is indexed, the index size */
|
||||||
uint8_t sw_index_size_B;
|
uint8_t sw_index_size_B;
|
||||||
|
|
||||||
|
/* Robustness settings for the vertex fetch */
|
||||||
|
struct agx_robustness robustness;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct agx_fs_prolog_key {
|
struct agx_fs_prolog_key {
|
||||||
|
@@ -11,6 +11,11 @@
|
|||||||
#include "util/u_math.h"
|
#include "util/u_math.h"
|
||||||
#include "shader_enums.h"
|
#include "shader_enums.h"
|
||||||
|
|
||||||
|
struct ctx {
|
||||||
|
struct agx_attribute *attribs;
|
||||||
|
struct agx_robustness rs;
|
||||||
|
};
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_rgb10_a2(const struct util_format_description *desc)
|
is_rgb10_a2(const struct util_format_description *desc)
|
||||||
{
|
{
|
||||||
@@ -109,7 +114,8 @@ pass(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||||||
if (intr->intrinsic != nir_intrinsic_load_input)
|
if (intr->intrinsic != nir_intrinsic_load_input)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
struct agx_attribute *attribs = data;
|
struct ctx *ctx = data;
|
||||||
|
struct agx_attribute *attribs = ctx->attribs;
|
||||||
b->cursor = nir_instr_remove(&intr->instr);
|
b->cursor = nir_instr_remove(&intr->instr);
|
||||||
|
|
||||||
nir_src *offset_src = nir_get_io_offset_src(intr);
|
nir_src *offset_src = nir_get_io_offset_src(intr);
|
||||||
@@ -190,14 +196,12 @@ pass(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||||||
* before the load. That is faster than the 4 cmpsel required after the load,
|
* before the load. That is faster than the 4 cmpsel required after the load,
|
||||||
* and it avoids waiting on the load which should help prolog performance.
|
* and it avoids waiting on the load which should help prolog performance.
|
||||||
*
|
*
|
||||||
* TODO: Plumb through soft fault information to skip this.
|
* TODO: Optimize.
|
||||||
*
|
*
|
||||||
* TODO: Add a knob for robustBufferAccess2 semantics.
|
* TODO: We always clamp to handle null descriptors. Maybe optimize?
|
||||||
*/
|
*/
|
||||||
bool robust = true;
|
nir_def *oob = nir_ult(b, bounds, el);
|
||||||
if (robust) {
|
el = nir_bcsel(b, oob, nir_imm_int(b, 0), el);
|
||||||
el = nir_umin(b, el, bounds);
|
|
||||||
}
|
|
||||||
|
|
||||||
nir_def *base = nir_load_vbo_base_agx(b, buf_handle);
|
nir_def *base = nir_load_vbo_base_agx(b, buf_handle);
|
||||||
|
|
||||||
@@ -228,6 +232,12 @@ pass(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||||||
b, interchange_comps, interchange_register_size, base, stride_offset_el,
|
b, interchange_comps, interchange_register_size, base, stride_offset_el,
|
||||||
.format = interchange_format, .base = shift);
|
.format = interchange_format, .base = shift);
|
||||||
|
|
||||||
|
/* TODO: Optimize per above */
|
||||||
|
if (ctx->rs.level >= AGX_ROBUSTNESS_D3D) {
|
||||||
|
nir_def *zero = nir_imm_zero(b, memory->num_components, memory->bit_size);
|
||||||
|
memory = nir_bcsel(b, oob, zero, memory);
|
||||||
|
}
|
||||||
|
|
||||||
unsigned dest_size = intr->def.bit_size;
|
unsigned dest_size = intr->def.bit_size;
|
||||||
|
|
||||||
/* Unpack but do not convert non-native non-array formats */
|
/* Unpack but do not convert non-native non-array formats */
|
||||||
@@ -290,9 +300,12 @@ pass(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
agx_nir_lower_vbo(nir_shader *shader, struct agx_attribute *attribs)
|
agx_nir_lower_vbo(nir_shader *shader, struct agx_attribute *attribs,
|
||||||
|
struct agx_robustness robustness)
|
||||||
{
|
{
|
||||||
assert(shader->info.stage == MESA_SHADER_VERTEX);
|
assert(shader->info.stage == MESA_SHADER_VERTEX);
|
||||||
|
|
||||||
|
struct ctx ctx = {.attribs = attribs, .rs = robustness};
|
||||||
return nir_shader_intrinsics_pass(
|
return nir_shader_intrinsics_pass(
|
||||||
shader, pass, nir_metadata_block_index | nir_metadata_dominance, attribs);
|
shader, pass, nir_metadata_block_index | nir_metadata_dominance, &ctx);
|
||||||
}
|
}
|
||||||
|
@@ -33,7 +33,30 @@ struct agx_attribute {
|
|||||||
bool instanced : 1;
|
bool instanced : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool agx_nir_lower_vbo(nir_shader *shader, struct agx_attribute *attribs);
|
enum agx_robustness_level {
|
||||||
|
/* No robustness */
|
||||||
|
AGX_ROBUSTNESS_DISABLED,
|
||||||
|
|
||||||
|
/* Invalid load/store must not fault, but undefined value/effect */
|
||||||
|
AGX_ROBUSTNESS_GLES,
|
||||||
|
|
||||||
|
/* Invalid load/store access something from the array (or 0) */
|
||||||
|
AGX_ROBUSTNESS_GL,
|
||||||
|
|
||||||
|
/* Invalid loads return 0 and invalid stores are dropped */
|
||||||
|
AGX_ROBUSTNESS_D3D,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct agx_robustness {
|
||||||
|
enum agx_robustness_level level;
|
||||||
|
|
||||||
|
/* Whether hardware "soft fault" is enabled. */
|
||||||
|
bool soft_fault;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool agx_nir_lower_vbo(nir_shader *shader, struct agx_attribute *attribs,
|
||||||
|
struct agx_robustness rs);
|
||||||
|
|
||||||
bool agx_vbo_supports_format(enum pipe_format format);
|
bool agx_vbo_supports_format(enum pipe_format format);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@@ -56,7 +56,8 @@ agx_nir_lower_poly_stipple(nir_shader *s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
lower_vbo(nir_shader *s, const struct agx_velem_key *key)
|
lower_vbo(nir_shader *s, const struct agx_velem_key *key,
|
||||||
|
const struct agx_robustness rs)
|
||||||
{
|
{
|
||||||
struct agx_attribute out[AGX_MAX_VBUFS];
|
struct agx_attribute out[AGX_MAX_VBUFS];
|
||||||
|
|
||||||
@@ -69,7 +70,7 @@ lower_vbo(nir_shader *s, const struct agx_velem_key *key)
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return agx_nir_lower_vbo(s, out);
|
return agx_nir_lower_vbo(s, out, rs);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@@ -166,7 +167,7 @@ agx_nir_vs_prolog(nir_builder *b, const void *key_)
|
|||||||
nir_export_agx(b, nir_load_instance_id(b), .base = 6 * 2);
|
nir_export_agx(b, nir_load_instance_id(b), .base = 6 * 2);
|
||||||
|
|
||||||
/* Now lower the resulting program using the key */
|
/* Now lower the resulting program using the key */
|
||||||
lower_vbo(b->shader, key->attribs);
|
lower_vbo(b->shader, key->attribs, key->robustness);
|
||||||
|
|
||||||
if (!key->hw) {
|
if (!key->hw) {
|
||||||
agx_nir_lower_index_buffer(b->shader, key->sw_index_size_B, false);
|
agx_nir_lower_index_buffer(b->shader, key->sw_index_size_B, false);
|
||||||
|
@@ -2213,6 +2213,14 @@ agx_update_vs(struct agx_context *ctx, unsigned index_size_B)
|
|||||||
struct agx_fast_link_key link_key = {
|
struct agx_fast_link_key link_key = {
|
||||||
.prolog.vs.hw = key.hw,
|
.prolog.vs.hw = key.hw,
|
||||||
.prolog.vs.sw_index_size_B = key.hw ? 0 : index_size_B,
|
.prolog.vs.sw_index_size_B = key.hw ? 0 : index_size_B,
|
||||||
|
|
||||||
|
/* TODO: We could optimize this */
|
||||||
|
.prolog.vs.robustness =
|
||||||
|
{
|
||||||
|
.level = AGX_ROBUSTNESS_GL,
|
||||||
|
.soft_fault = false,
|
||||||
|
},
|
||||||
|
|
||||||
.main = ctx->vs,
|
.main = ctx->vs,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user