agx: Allow drivers to lower texture handles

Rather than hardcoding u0_u1, this lets drivers map texture handles in whatever
way is convenient. In particular, this makes textures work properly with merged
shader stages (provided one of the stages is forced to use bindless access), by
giving each stage an independent texture heap.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26056>
This commit is contained in:
Alyssa Rosenzweig
2023-10-03 15:38:23 -04:00
committed by Marge Bot
parent b69ab37bdc
commit 111e526f19
8 changed files with 89 additions and 94 deletions

View File

@@ -15,6 +15,7 @@
#include "agx_debug.h"
#include "agx_internal_formats.h"
#include "agx_nir.h"
#include "nir.h"
#include "nir_intrinsics.h"
/* Alignment for shader programs. I'm not sure what the optimal value is. */
@@ -2629,6 +2630,27 @@ lower_bit_size_callback(const nir_instr *instr, UNUSED void *_)
return 0;
}
static bool
lower_load_from_texture_handle(nir_builder *b, nir_intrinsic_instr *intr,
void *data)
{
if (intr->intrinsic != nir_intrinsic_load_from_texture_handle_agx)
return false;
/* Bindless handles are a vec2, where the first source is the (constant)
* uniform register number and the second source is the byte offset.
*/
nir_scalar uniform = nir_scalar_resolved(intr->src[0].ssa, 0);
unsigned uniform_idx = nir_scalar_as_uint(uniform);
b->cursor = nir_instr_remove(&intr->instr);
nir_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
nir_def *offset = nir_u2u64(b, nir_channel(b, intr->src[0].ssa, 1));
nir_def_rewrite_uses(&intr->def, nir_iadd(b, base, offset));
return true;
}
static bool
agx_should_dump(nir_shader *nir, unsigned agx_dbg_bit)
{
@@ -2956,6 +2978,10 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
NIR_PASS_V(nir, agx_nir_lower_layer);
}
NIR_PASS_V(nir, nir_opt_constant_folding);
NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_load_from_texture_handle,
nir_metadata_block_index | nir_metadata_dominance, NULL);
out->push_count = key->reserved_preamble;
agx_optimize_nir(nir, &out->push_count);

View File

@@ -17,27 +17,12 @@
#include "nir_intrinsics.h"
#include "nir_intrinsics_indices.h"
static nir_def *
texture_descriptor_ptr_for_handle(nir_builder *b, nir_def *handle)
{
/* Bindless handles are a vec2, where the first source is the (constant)
* uniform register number and the second source is the byte offset.
*/
nir_scalar uniform = nir_scalar_resolved(handle, 0);
unsigned uniform_idx = nir_scalar_as_uint(uniform);
nir_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
nir_def *offset = nir_u2u64(b, nir_channel(b, handle, 1));
return nir_iadd(b, base, offset);
}
static nir_def *
texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
{
int handle_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
assert(handle_idx >= 0 && "must be bindless");
return texture_descriptor_ptr_for_handle(b, tex->src[handle_idx].src.ssa);
return nir_load_from_texture_handle_agx(b, tex->src[handle_idx].src.ssa);
}
static bool
@@ -430,7 +415,7 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
{
/* First, calculate the address of the PBE descriptor */
nir_def *desc_address =
texture_descriptor_ptr_for_handle(b, intr->src[0].ssa);
nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
nir_def *coord = intr->src[1].ssa;
enum pipe_format format = nir_intrinsic_format(intr);

View File

@@ -7,7 +7,23 @@
#include "agx_compile.h"
#include "agx_device.h" /* for AGX_MEMORY_TYPE_SHADER */
#include "agx_tilebuffer.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir_intrinsics.h"
static bool
lower_tex_handle_to_u0(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic != nir_intrinsic_load_texture_handle_agx)
return false;
b->cursor = nir_instr_remove(&intr->instr);
nir_def_rewrite_uses(
&intr->def,
nir_vec2(b, nir_imm_int(b, 0), nir_imul_imm(b, intr->src[0].ssa, 24)));
return true;
}
static struct agx_meta_shader *
agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
@@ -23,6 +39,10 @@ agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
agx_nir_lower_tilebuffer(shader, tib, NULL, &bindless_base, NULL, true);
agx_nir_lower_monolithic_msaa(
shader, &(struct agx_msaa_state){.nr_samples = tib->nr_samples});
nir_shader_intrinsics_pass(
shader, lower_tex_handle_to_u0,
nir_metadata_dominance | nir_metadata_block_index, NULL);
}
key->libagx = cache->dev->libagx;

View File

@@ -117,10 +117,6 @@ load_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
*
* Note that this lower happens after driver bindings are lowered, so the
* bindless handle is in the AGX-specific format.
*
* Assumes that texture states are mapped to a bindless table is in u0_u1 and
* texture/PBE descriptors are alternated for each render target. This is
* ABI. If we need to make this more flexible for Vulkan later, we can.
*/
static nir_def *
handle_for_rt(nir_builder *b, unsigned base, unsigned rt, bool pbe,
@@ -129,13 +125,10 @@ handle_for_rt(nir_builder *b, unsigned base, unsigned rt, bool pbe,
unsigned index = base + (2 * rt) + (pbe ? 1 : 0);
*bindless = (*bindless) || (index >= AGX_NUM_TEXTURE_STATE_REGS);
if (*bindless) {
unsigned table = 0 * 2;
unsigned offset_B = index * AGX_TEXTURE_LENGTH;
return nir_imm_ivec2(b, table, offset_B);
} else {
if (*bindless)
return nir_load_texture_handle_agx(b, nir_imm_int(b, index));
else
return nir_imm_intN_t(b, index, 16);
}
}
static enum glsl_sampler_dim

View File

@@ -9,23 +9,6 @@
#include "agx_state.h"
#include "nir_intrinsics_indices.h"
#define AGX_TEXTURE_DESC_STRIDE 24
/*
* Construct a bindless handle corresponding to an index into the binding
* tables. Our driver ABI maps everything to a table addressed by u0_u1, with
* indices mapped 1:1 with the binding table. So we want the bindless handle
* (u0_u1, index) which is encoded in NIR as (0, index).
*/
static nir_def *
index_to_handle(nir_builder *b, nir_def *index)
{
nir_def *table = nir_imm_int(b, 0);
nir_def *offset = nir_imul_imm(b, index, AGX_TEXTURE_DESC_STRIDE);
return nir_vec2(b, table, offset);
}
/*
* Lower binding table textures and images to texture state registers and (if
* necessary) bindless access into an internal table mapped like additional
@@ -37,7 +20,6 @@ index_to_handle(nir_builder *b, nir_def *index)
static bool
lower(nir_builder *b, nir_instr *instr, void *data)
{
bool *internal_bindless = data;
bool force_bindless = agx_nir_needs_texture_crawl(instr);
b->cursor = nir_before_instr(instr);
@@ -96,10 +78,8 @@ lower(nir_builder *b, nir_instr *instr, void *data)
if (nir_intrinsic_has_atomic_op(intr))
nir_intrinsic_set_atomic_op(intr, op);
*internal_bindless = true;
index = nir_iadd_imm(b, nir_imul_imm(b, index, 2), offset);
nir_src_rewrite(&intr->src[0], index_to_handle(b, index));
nir_src_rewrite(&intr->src[0], nir_load_texture_handle_agx(b, index));
} else if (instr->type == nir_instr_type_tex) {
nir_tex_instr *tex = nir_instr_as_tex(instr);
@@ -120,16 +100,15 @@ lower(nir_builder *b, nir_instr *instr, void *data)
if (!index)
index = nir_imm_int(b, tex->texture_index);
*internal_bindless = true;
nir_tex_instr_add_src(tex, nir_tex_src_texture_handle,
index_to_handle(b, index));
nir_load_texture_handle_agx(b, index));
}
return false;
}
bool
agx_nir_lower_bindings(nir_shader *shader, bool *internal_bindless)
agx_nir_lower_bindings(nir_shader *shader, bool *uses_bindless_samplers)
{
/* First lower index to offset so we can lower more naturally */
bool progress = nir_lower_tex(
@@ -142,6 +121,6 @@ agx_nir_lower_bindings(nir_shader *shader, bool *internal_bindless)
progress |= nir_shader_instructions_pass(
shader, lower, nir_metadata_block_index | nir_metadata_dominance,
internal_bindless);
uses_bindless_samplers);
return progress;
}

View File

@@ -12,6 +12,8 @@
#include "nir_intrinsics.h"
#include "nir_intrinsics_indices.h"
#define AGX_TEXTURE_DESC_STRIDE 24
/*
* Lower all system values to uniform loads. This pass tries to compact ranges
* of contiguous uploaded uniforms to reduce the draw-time overhead of uploading
@@ -103,6 +105,17 @@ load_ubo(nir_builder *b, nir_intrinsic_instr *intr, void *bases)
intr->num_components, intr->def.bit_size);
}
static nir_def *
load_texture_handle(nir_builder *b, nir_intrinsic_instr *intr, void *base)
{
nir_def *uniform =
nir_load_sysval_agx(b, 1, 64, .desc_set = stage_table(b),
.binding = (uintptr_t)base, .flags = ~0);
return nir_vec2(b, nir_u2u32(b, uniform),
nir_imul_imm(b, intr->src[0].ssa, AGX_TEXTURE_DESC_STRIDE));
}
static nir_def *
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
{
@@ -112,6 +125,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_load_ubo:
return load_ubo(b, intr, s->ubo_base);
case nir_intrinsic_load_texture_handle_agx:
return load_texture_handle(b, intr, &s->texture_base);
case nir_intrinsic_load_vbo_base_agx:
return load_sysval_indirect(b, 1, 64, AGX_SYSVAL_TABLE_ROOT, &u->vbo_base,
intr->src[0].ssa);
@@ -289,29 +304,6 @@ lay_out_table(struct agx_compiled_shader *shader, struct table_state *state,
return uniform;
}
/* Reserve u0_u1 for the texture base if needed for internal bindless operation.
* When we have too many textures/images for the available texture state
* registers, an early lowering pass in the driver spills some textures/images
* out of texture state registers and instead accesses them as bindless
* internally. That pass assumes u0_u1 points to the texture descriptors
* otherwise bound to texture state registers.
*/
static void
reserve_internal_bindless(struct state *state, enum pipe_shader_type stage)
{
struct table_state *table = &state->tables[AGX_SYSVAL_STAGE(stage)];
struct agx_stage_uniforms *s = NULL;
const unsigned len_words = sizeof(s->texture_base) / sizeof(uint16_t);
static_assert(offsetof(struct agx_stage_uniforms, texture_base) == 0, "ABI");
static_assert(sizeof(s->texture_base) == 8, "64-bit pointer");
BITSET_SET_RANGE(table->pushed, 0, len_words - 1);
for (unsigned i = 0; i < len_words; ++i)
table->element_size[i] = len_words;
}
static unsigned
lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state)
{
@@ -328,15 +320,21 @@ lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state)
nir_intrinsic_instr *intr = *intr_;
uint8_t table = nir_intrinsic_desc_set(intr);
uint16_t offset = nir_intrinsic_binding(intr);
bool load_uniform_location = nir_intrinsic_flags(intr);
struct agx_push_range *range =
find_push_range_containing(shader, table, offset);
unsigned base = range->uniform + ((offset - range->offset) / 2);
nir_builder b = nir_builder_at(nir_instr_remove(&(intr->instr)));
nir_def *repl;
nir_def *repl = nir_load_preamble(
&b, intr->def.num_components, intr->def.bit_size,
.base = range->uniform + ((offset - range->offset) / 2));
if (load_uniform_location) {
repl = nir_imm_int(&b, base);
} else {
repl = nir_load_preamble(&b, intr->def.num_components,
intr->def.bit_size, .base = base);
}
nir_def_rewrite_uses(&intr->def, repl);
}
@@ -353,7 +351,7 @@ agx_nir_lower_sysvals(nir_shader *shader)
}
bool
agx_nir_layout_uniforms(nir_shader *shader, bool internal_bindless,
agx_nir_layout_uniforms(nir_shader *shader,
struct agx_compiled_shader *compiled,
unsigned *push_size)
{
@@ -362,12 +360,12 @@ agx_nir_layout_uniforms(nir_shader *shader, bool internal_bindless,
nir_metadata_block_index | nir_metadata_dominance,
&state);
if (internal_bindless)
reserve_internal_bindless(&state, shader->info.stage);
*push_size = lay_out_uniforms(compiled, &state);
util_dynarray_fini(&state.loads);
/* Make sure texture handles have constants associated */
nir_opt_constant_folding(shader);
return true;
}

View File

@@ -1655,9 +1655,6 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
NIR_PASS_V(nir, agx_nir_lower_tilebuffer, &tib, colormasks, &rt_spill,
&force_translucent, false);
/* If anything spilled, we have bindless texture */
so->internal_bindless |= (rt_spill != rt_spill_base);
NIR_PASS_V(nir, agx_nir_lower_sample_intrinsics);
NIR_PASS_V(nir, agx_nir_lower_monolithic_msaa,
&(struct agx_msaa_state){
@@ -1690,7 +1687,7 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
}
NIR_PASS_V(nir, agx_nir_lower_sysvals);
NIR_PASS_V(nir, agx_nir_layout_uniforms, so->internal_bindless, compiled,
NIR_PASS_V(nir, agx_nir_layout_uniforms, compiled,
&base_key.reserved_preamble);
agx_compile_shader_nir(nir, &base_key, debug, &binary, &compiled->info);
@@ -1781,7 +1778,7 @@ agx_shader_initialize(struct agx_device *dev, struct agx_uncompiled_shader *so,
/* We need to lower binding tables before calling agx_preprocess_nir, since
* that does texture lowering that needs to know the binding model.
*/
NIR_PASS_V(nir, agx_nir_lower_bindings, &so->internal_bindless);
NIR_PASS_V(nir, agx_nir_lower_bindings, &so->uses_bindless_samplers);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
/* Lower to maximum colour buffers, the excess stores will get cleaned up

View File

@@ -203,11 +203,8 @@ struct agx_uncompiled_shader {
struct hash_table *variants;
bool has_xfb_info;
/* If set, we need to pass the address of the texture/image table as uniform
* u0_u1 due to binding tables that were lowered to be internally bindless
* with that base address.
*/
bool internal_bindless;
/* Whether the shader accesses indexed samplers via the bindless heap */
bool uses_bindless_samplers;
/* Set on VS, passed to FS for linkage */
unsigned base_varying;
@@ -736,11 +733,11 @@ uint64_t agx_upload_stage_uniforms(struct agx_batch *batch, uint64_t textures,
bool agx_nir_lower_sysvals(nir_shader *shader);
bool agx_nir_layout_uniforms(nir_shader *shader, bool internal_bindless,
bool agx_nir_layout_uniforms(nir_shader *shader,
struct agx_compiled_shader *compiled,
unsigned *push_size);
bool agx_nir_lower_bindings(nir_shader *shader, bool *internal_bindless);
bool agx_nir_lower_bindings(nir_shader *shader, bool *uses_bindless_samplers);
bool agx_batch_is_active(struct agx_batch *batch);
bool agx_batch_is_submitted(struct agx_batch *batch);