agx: Allow drivers to lower texture handles
Rather than hardcoding u0_u1, this lets drivers map texture handles in whatever way is convenient. In particular, this makes textures work properly with merged shader stages (provided one of the stages is forced to use bindless access), by giving each stage an independent texture heap. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26056>
This commit is contained in:

committed by
Marge Bot

parent
b69ab37bdc
commit
111e526f19
@@ -15,6 +15,7 @@
|
||||
#include "agx_debug.h"
|
||||
#include "agx_internal_formats.h"
|
||||
#include "agx_nir.h"
|
||||
#include "nir.h"
|
||||
#include "nir_intrinsics.h"
|
||||
|
||||
/* Alignment for shader programs. I'm not sure what the optimal value is. */
|
||||
@@ -2629,6 +2630,27 @@ lower_bit_size_callback(const nir_instr *instr, UNUSED void *_)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_load_from_texture_handle(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_load_from_texture_handle_agx)
|
||||
return false;
|
||||
|
||||
/* Bindless handles are a vec2, where the first source is the (constant)
|
||||
* uniform register number and the second source is the byte offset.
|
||||
*/
|
||||
nir_scalar uniform = nir_scalar_resolved(intr->src[0].ssa, 0);
|
||||
unsigned uniform_idx = nir_scalar_as_uint(uniform);
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
|
||||
nir_def *offset = nir_u2u64(b, nir_channel(b, intr->src[0].ssa, 1));
|
||||
|
||||
nir_def_rewrite_uses(&intr->def, nir_iadd(b, base, offset));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
agx_should_dump(nir_shader *nir, unsigned agx_dbg_bit)
|
||||
{
|
||||
@@ -2956,6 +2978,10 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
|
||||
NIR_PASS_V(nir, agx_nir_lower_layer);
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_opt_constant_folding);
|
||||
NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_load_from_texture_handle,
|
||||
nir_metadata_block_index | nir_metadata_dominance, NULL);
|
||||
|
||||
out->push_count = key->reserved_preamble;
|
||||
agx_optimize_nir(nir, &out->push_count);
|
||||
|
||||
|
@@ -17,27 +17,12 @@
|
||||
#include "nir_intrinsics.h"
|
||||
#include "nir_intrinsics_indices.h"
|
||||
|
||||
static nir_def *
|
||||
texture_descriptor_ptr_for_handle(nir_builder *b, nir_def *handle)
|
||||
{
|
||||
/* Bindless handles are a vec2, where the first source is the (constant)
|
||||
* uniform register number and the second source is the byte offset.
|
||||
*/
|
||||
nir_scalar uniform = nir_scalar_resolved(handle, 0);
|
||||
unsigned uniform_idx = nir_scalar_as_uint(uniform);
|
||||
|
||||
nir_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
|
||||
nir_def *offset = nir_u2u64(b, nir_channel(b, handle, 1));
|
||||
|
||||
return nir_iadd(b, base, offset);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
int handle_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
|
||||
assert(handle_idx >= 0 && "must be bindless");
|
||||
return texture_descriptor_ptr_for_handle(b, tex->src[handle_idx].src.ssa);
|
||||
return nir_load_from_texture_handle_agx(b, tex->src[handle_idx].src.ssa);
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -430,7 +415,7 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
{
|
||||
/* First, calculate the address of the PBE descriptor */
|
||||
nir_def *desc_address =
|
||||
texture_descriptor_ptr_for_handle(b, intr->src[0].ssa);
|
||||
nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
|
||||
|
||||
nir_def *coord = intr->src[1].ssa;
|
||||
enum pipe_format format = nir_intrinsic_format(intr);
|
||||
|
@@ -7,7 +7,23 @@
|
||||
#include "agx_compile.h"
|
||||
#include "agx_device.h" /* for AGX_MEMORY_TYPE_SHADER */
|
||||
#include "agx_tilebuffer.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_intrinsics.h"
|
||||
|
||||
static bool
|
||||
lower_tex_handle_to_u0(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_load_texture_handle_agx)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
nir_def_rewrite_uses(
|
||||
&intr->def,
|
||||
nir_vec2(b, nir_imm_int(b, 0), nir_imul_imm(b, intr->src[0].ssa, 24)));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct agx_meta_shader *
|
||||
agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
|
||||
@@ -23,6 +39,10 @@ agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
|
||||
agx_nir_lower_tilebuffer(shader, tib, NULL, &bindless_base, NULL, true);
|
||||
agx_nir_lower_monolithic_msaa(
|
||||
shader, &(struct agx_msaa_state){.nr_samples = tib->nr_samples});
|
||||
|
||||
nir_shader_intrinsics_pass(
|
||||
shader, lower_tex_handle_to_u0,
|
||||
nir_metadata_dominance | nir_metadata_block_index, NULL);
|
||||
}
|
||||
|
||||
key->libagx = cache->dev->libagx;
|
||||
|
@@ -117,10 +117,6 @@ load_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
|
||||
*
|
||||
* Note that this lower happens after driver bindings are lowered, so the
|
||||
* bindless handle is in the AGX-specific format.
|
||||
*
|
||||
* Assumes that texture states are mapped to a bindless table is in u0_u1 and
|
||||
* texture/PBE descriptors are alternated for each render target. This is
|
||||
* ABI. If we need to make this more flexible for Vulkan later, we can.
|
||||
*/
|
||||
static nir_def *
|
||||
handle_for_rt(nir_builder *b, unsigned base, unsigned rt, bool pbe,
|
||||
@@ -129,13 +125,10 @@ handle_for_rt(nir_builder *b, unsigned base, unsigned rt, bool pbe,
|
||||
unsigned index = base + (2 * rt) + (pbe ? 1 : 0);
|
||||
*bindless = (*bindless) || (index >= AGX_NUM_TEXTURE_STATE_REGS);
|
||||
|
||||
if (*bindless) {
|
||||
unsigned table = 0 * 2;
|
||||
unsigned offset_B = index * AGX_TEXTURE_LENGTH;
|
||||
return nir_imm_ivec2(b, table, offset_B);
|
||||
} else {
|
||||
if (*bindless)
|
||||
return nir_load_texture_handle_agx(b, nir_imm_int(b, index));
|
||||
else
|
||||
return nir_imm_intN_t(b, index, 16);
|
||||
}
|
||||
}
|
||||
|
||||
static enum glsl_sampler_dim
|
||||
|
@@ -9,23 +9,6 @@
|
||||
#include "agx_state.h"
|
||||
#include "nir_intrinsics_indices.h"
|
||||
|
||||
#define AGX_TEXTURE_DESC_STRIDE 24
|
||||
|
||||
/*
|
||||
* Construct a bindless handle corresponding to an index into the binding
|
||||
* tables. Our driver ABI maps everything to a table addressed by u0_u1, with
|
||||
* indices mapped 1:1 with the binding table. So we want the bindless handle
|
||||
* (u0_u1, index) which is encoded in NIR as (0, index).
|
||||
*/
|
||||
static nir_def *
|
||||
index_to_handle(nir_builder *b, nir_def *index)
|
||||
{
|
||||
nir_def *table = nir_imm_int(b, 0);
|
||||
nir_def *offset = nir_imul_imm(b, index, AGX_TEXTURE_DESC_STRIDE);
|
||||
|
||||
return nir_vec2(b, table, offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lower binding table textures and images to texture state registers and (if
|
||||
* necessary) bindless access into an internal table mapped like additional
|
||||
@@ -37,7 +20,6 @@ index_to_handle(nir_builder *b, nir_def *index)
|
||||
static bool
|
||||
lower(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
bool *internal_bindless = data;
|
||||
bool force_bindless = agx_nir_needs_texture_crawl(instr);
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
@@ -96,10 +78,8 @@ lower(nir_builder *b, nir_instr *instr, void *data)
|
||||
if (nir_intrinsic_has_atomic_op(intr))
|
||||
nir_intrinsic_set_atomic_op(intr, op);
|
||||
|
||||
*internal_bindless = true;
|
||||
|
||||
index = nir_iadd_imm(b, nir_imul_imm(b, index, 2), offset);
|
||||
nir_src_rewrite(&intr->src[0], index_to_handle(b, index));
|
||||
nir_src_rewrite(&intr->src[0], nir_load_texture_handle_agx(b, index));
|
||||
} else if (instr->type == nir_instr_type_tex) {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
|
||||
@@ -120,16 +100,15 @@ lower(nir_builder *b, nir_instr *instr, void *data)
|
||||
if (!index)
|
||||
index = nir_imm_int(b, tex->texture_index);
|
||||
|
||||
*internal_bindless = true;
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_texture_handle,
|
||||
index_to_handle(b, index));
|
||||
nir_load_texture_handle_agx(b, index));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_bindings(nir_shader *shader, bool *internal_bindless)
|
||||
agx_nir_lower_bindings(nir_shader *shader, bool *uses_bindless_samplers)
|
||||
{
|
||||
/* First lower index to offset so we can lower more naturally */
|
||||
bool progress = nir_lower_tex(
|
||||
@@ -142,6 +121,6 @@ agx_nir_lower_bindings(nir_shader *shader, bool *internal_bindless)
|
||||
|
||||
progress |= nir_shader_instructions_pass(
|
||||
shader, lower, nir_metadata_block_index | nir_metadata_dominance,
|
||||
internal_bindless);
|
||||
uses_bindless_samplers);
|
||||
return progress;
|
||||
}
|
||||
|
@@ -12,6 +12,8 @@
|
||||
#include "nir_intrinsics.h"
|
||||
#include "nir_intrinsics_indices.h"
|
||||
|
||||
#define AGX_TEXTURE_DESC_STRIDE 24
|
||||
|
||||
/*
|
||||
* Lower all system values to uniform loads. This pass tries to compact ranges
|
||||
* of contiguous uploaded uniforms to reduce the draw-time overhead of uploading
|
||||
@@ -103,6 +105,17 @@ load_ubo(nir_builder *b, nir_intrinsic_instr *intr, void *bases)
|
||||
intr->num_components, intr->def.bit_size);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
load_texture_handle(nir_builder *b, nir_intrinsic_instr *intr, void *base)
|
||||
{
|
||||
nir_def *uniform =
|
||||
nir_load_sysval_agx(b, 1, 64, .desc_set = stage_table(b),
|
||||
.binding = (uintptr_t)base, .flags = ~0);
|
||||
|
||||
return nir_vec2(b, nir_u2u32(b, uniform),
|
||||
nir_imul_imm(b, intr->src[0].ssa, AGX_TEXTURE_DESC_STRIDE));
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
|
||||
{
|
||||
@@ -112,6 +125,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr)
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_ubo:
|
||||
return load_ubo(b, intr, s->ubo_base);
|
||||
case nir_intrinsic_load_texture_handle_agx:
|
||||
return load_texture_handle(b, intr, &s->texture_base);
|
||||
case nir_intrinsic_load_vbo_base_agx:
|
||||
return load_sysval_indirect(b, 1, 64, AGX_SYSVAL_TABLE_ROOT, &u->vbo_base,
|
||||
intr->src[0].ssa);
|
||||
@@ -289,29 +304,6 @@ lay_out_table(struct agx_compiled_shader *shader, struct table_state *state,
|
||||
return uniform;
|
||||
}
|
||||
|
||||
/* Reserve u0_u1 for the texture base if needed for internal bindless operation.
|
||||
* When we have too many textures/images for the available texture state
|
||||
* registers, an early lowering pass in the driver spills some textures/images
|
||||
* out of texture state registers and instead accesses them as bindless
|
||||
* internally. That pass assumes u0_u1 points to the texture descriptors
|
||||
* otherwise bound to texture state registers.
|
||||
*/
|
||||
static void
|
||||
reserve_internal_bindless(struct state *state, enum pipe_shader_type stage)
|
||||
{
|
||||
struct table_state *table = &state->tables[AGX_SYSVAL_STAGE(stage)];
|
||||
struct agx_stage_uniforms *s = NULL;
|
||||
const unsigned len_words = sizeof(s->texture_base) / sizeof(uint16_t);
|
||||
|
||||
static_assert(offsetof(struct agx_stage_uniforms, texture_base) == 0, "ABI");
|
||||
static_assert(sizeof(s->texture_base) == 8, "64-bit pointer");
|
||||
|
||||
BITSET_SET_RANGE(table->pushed, 0, len_words - 1);
|
||||
|
||||
for (unsigned i = 0; i < len_words; ++i)
|
||||
table->element_size[i] = len_words;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state)
|
||||
{
|
||||
@@ -328,15 +320,21 @@ lay_out_uniforms(struct agx_compiled_shader *shader, struct state *state)
|
||||
nir_intrinsic_instr *intr = *intr_;
|
||||
uint8_t table = nir_intrinsic_desc_set(intr);
|
||||
uint16_t offset = nir_intrinsic_binding(intr);
|
||||
bool load_uniform_location = nir_intrinsic_flags(intr);
|
||||
|
||||
struct agx_push_range *range =
|
||||
find_push_range_containing(shader, table, offset);
|
||||
unsigned base = range->uniform + ((offset - range->offset) / 2);
|
||||
|
||||
nir_builder b = nir_builder_at(nir_instr_remove(&(intr->instr)));
|
||||
nir_def *repl;
|
||||
|
||||
nir_def *repl = nir_load_preamble(
|
||||
&b, intr->def.num_components, intr->def.bit_size,
|
||||
.base = range->uniform + ((offset - range->offset) / 2));
|
||||
if (load_uniform_location) {
|
||||
repl = nir_imm_int(&b, base);
|
||||
} else {
|
||||
repl = nir_load_preamble(&b, intr->def.num_components,
|
||||
intr->def.bit_size, .base = base);
|
||||
}
|
||||
|
||||
nir_def_rewrite_uses(&intr->def, repl);
|
||||
}
|
||||
@@ -353,7 +351,7 @@ agx_nir_lower_sysvals(nir_shader *shader)
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_layout_uniforms(nir_shader *shader, bool internal_bindless,
|
||||
agx_nir_layout_uniforms(nir_shader *shader,
|
||||
struct agx_compiled_shader *compiled,
|
||||
unsigned *push_size)
|
||||
{
|
||||
@@ -362,12 +360,12 @@ agx_nir_layout_uniforms(nir_shader *shader, bool internal_bindless,
|
||||
nir_metadata_block_index | nir_metadata_dominance,
|
||||
&state);
|
||||
|
||||
if (internal_bindless)
|
||||
reserve_internal_bindless(&state, shader->info.stage);
|
||||
|
||||
*push_size = lay_out_uniforms(compiled, &state);
|
||||
|
||||
util_dynarray_fini(&state.loads);
|
||||
|
||||
/* Make sure texture handles have constants associated */
|
||||
nir_opt_constant_folding(shader);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@@ -1655,9 +1655,6 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
|
||||
NIR_PASS_V(nir, agx_nir_lower_tilebuffer, &tib, colormasks, &rt_spill,
|
||||
&force_translucent, false);
|
||||
|
||||
/* If anything spilled, we have bindless texture */
|
||||
so->internal_bindless |= (rt_spill != rt_spill_base);
|
||||
|
||||
NIR_PASS_V(nir, agx_nir_lower_sample_intrinsics);
|
||||
NIR_PASS_V(nir, agx_nir_lower_monolithic_msaa,
|
||||
&(struct agx_msaa_state){
|
||||
@@ -1690,7 +1687,7 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, agx_nir_lower_sysvals);
|
||||
NIR_PASS_V(nir, agx_nir_layout_uniforms, so->internal_bindless, compiled,
|
||||
NIR_PASS_V(nir, agx_nir_layout_uniforms, compiled,
|
||||
&base_key.reserved_preamble);
|
||||
|
||||
agx_compile_shader_nir(nir, &base_key, debug, &binary, &compiled->info);
|
||||
@@ -1781,7 +1778,7 @@ agx_shader_initialize(struct agx_device *dev, struct agx_uncompiled_shader *so,
|
||||
/* We need to lower binding tables before calling agx_preprocess_nir, since
|
||||
* that does texture lowering that needs to know the binding model.
|
||||
*/
|
||||
NIR_PASS_V(nir, agx_nir_lower_bindings, &so->internal_bindless);
|
||||
NIR_PASS_V(nir, agx_nir_lower_bindings, &so->uses_bindless_samplers);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
/* Lower to maximum colour buffers, the excess stores will get cleaned up
|
||||
|
@@ -203,11 +203,8 @@ struct agx_uncompiled_shader {
|
||||
struct hash_table *variants;
|
||||
bool has_xfb_info;
|
||||
|
||||
/* If set, we need to pass the address of the texture/image table as uniform
|
||||
* u0_u1 due to binding tables that were lowered to be internally bindless
|
||||
* with that base address.
|
||||
*/
|
||||
bool internal_bindless;
|
||||
/* Whether the shader accesses indexed samplers via the bindless heap */
|
||||
bool uses_bindless_samplers;
|
||||
|
||||
/* Set on VS, passed to FS for linkage */
|
||||
unsigned base_varying;
|
||||
@@ -736,11 +733,11 @@ uint64_t agx_upload_stage_uniforms(struct agx_batch *batch, uint64_t textures,
|
||||
|
||||
bool agx_nir_lower_sysvals(nir_shader *shader);
|
||||
|
||||
bool agx_nir_layout_uniforms(nir_shader *shader, bool internal_bindless,
|
||||
bool agx_nir_layout_uniforms(nir_shader *shader,
|
||||
struct agx_compiled_shader *compiled,
|
||||
unsigned *push_size);
|
||||
|
||||
bool agx_nir_lower_bindings(nir_shader *shader, bool *internal_bindless);
|
||||
bool agx_nir_lower_bindings(nir_shader *shader, bool *uses_bindless_samplers);
|
||||
|
||||
bool agx_batch_is_active(struct agx_batch *batch);
|
||||
bool agx_batch_is_submitted(struct agx_batch *batch);
|
||||
|
Reference in New Issue
Block a user