From 76e3bd56f6ecd157b509d4fe37939d59e6d7430b Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 26 Jul 2024 09:43:44 -0400 Subject: [PATCH] asahi: offset buffer images in software this is needed for honeykrisp to implement uniformTexelBufferOffsetSingleTexelAlignment. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/genxml/cmdbuf.xml | 6 ++++-- src/asahi/lib/agx_nir_lower_texture.c | 31 ++++++++++++++++++++------- src/asahi/lib/shaders/texture.cl | 27 ++++++++++++++++++++--- src/gallium/drivers/asahi/agx_state.c | 5 ++--- 4 files changed, 53 insertions(+), 16 deletions(-) diff --git a/src/asahi/genxml/cmdbuf.xml b/src/asahi/genxml/cmdbuf.xml index 8757c614a5f..d937477b8b2 100644 --- a/src/asahi/genxml/cmdbuf.xml +++ b/src/asahi/genxml/cmdbuf.xml @@ -259,6 +259,7 @@ + @@ -301,8 +302,9 @@ - - + + + diff --git a/src/asahi/lib/agx_nir_lower_texture.c b/src/asahi/lib/agx_nir_lower_texture.c index e6fa26f9090..e6b40a27a73 100644 --- a/src/asahi/lib/agx_nir_lower_texture.c +++ b/src/asahi/lib/agx_nir_lower_texture.c @@ -134,15 +134,18 @@ static bool lower_buffer_texture(nir_builder *b, nir_tex_instr *tex) { nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord); + nir_def *size = nir_get_texture_size(b, tex); + nir_def *oob = nir_uge(b, coord, size); + + /* Apply the buffer offset after calculating oob but before remapping */ + nir_def *desc = texture_descriptor_ptr(b, tex); + coord = libagx_buffer_texture_offset(b, desc, coord); /* Map out-of-bounds indices to out-of-bounds coordinates for robustness2 * semantics from the hardware. */ - nir_def *size = nir_get_texture_size(b, tex); - nir_def *oob = nir_uge(b, coord, size); coord = nir_bcsel(b, oob, nir_imm_int(b, -1), coord); - nir_def *desc = texture_descriptor_ptr(b, tex); bool is_float = nir_alu_type_get_base_type(tex->dest_type) == nir_type_float; /* Lower RGB32 reads if the format requires. If we are out-of-bounds, we use @@ -502,6 +505,15 @@ lower_buffer_image(nir_builder *b, nir_intrinsic_instr *intr) nir_def *coord_vector = intr->src[1].ssa; nir_def *coord = nir_channel(b, coord_vector, 0); + /* If we're not bindless, assume we don't need an offset (GL driver) */ + if (intr->intrinsic == nir_intrinsic_bindless_image_load) { + nir_def *desc = nir_load_from_texture_handle_agx(b, intr->src[0].ssa); + coord = libagx_buffer_texture_offset(b, desc, coord); + } else if (intr->intrinsic == nir_intrinsic_bindless_image_store) { + nir_def *desc = nir_load_from_texture_handle_agx(b, intr->src[0].ssa); + coord = libagx_buffer_image_offset(b, desc, coord); + } + /* Lower the buffer load/store to a 2D image load/store, matching the 2D * texture/PBE descriptor the driver supplies for buffer images. */ @@ -667,14 +679,17 @@ lower_robustness(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data) } /* Replace the last coordinate component with a large coordinate for - * out-of-bounds. We pick 65535 as it fits in 16-bit, and it is not signed as - * 32-bit so we won't get in-bounds coordinates for arrays due to two's - * complement wraparound. This ensures the resulting hardware coordinate is - * definitely out-of-bounds, giving hardware-level robustness2 behaviour. + * out-of-bounds. We pick 0xFFF0 as it fits in 16-bit, and it is not signed + * as 32-bit so we won't get in-bounds coordinates for arrays due to two's + * complement wraparound. Additionally it still meets this requirement after + * adding 0xF, the maximum tail offset. + * + * This ensures the resulting hardware coordinate is definitely + * out-of-bounds, giving hardware-level robustness2 behaviour. */ unsigned c = size_components - 1; nir_def *r = - nir_bcsel(b, oob, nir_imm_int(b, 65535), nir_channel(b, coord, c)); + nir_bcsel(b, oob, nir_imm_int(b, 0xFFF0), nir_channel(b, coord, c)); nir_src_rewrite(&intr->src[1], nir_vector_insert_imm(b, coord, r, c)); return true; diff --git a/src/asahi/lib/shaders/texture.cl b/src/asahi/lib/shaders/texture.cl index 2940c5e04e6..1217ee641e5 100644 --- a/src/asahi/lib/shaders/texture.cl +++ b/src/asahi/lib/shaders/texture.cl @@ -24,7 +24,7 @@ libagx_txs(constant struct agx_texture_packed *ptr, uint16_t lod, * Instead, we stash it in the software-defined section. */ if (is_buffer) - return d.software_defined; + return d.buffer_size_sw; /* Load standard dimensions */ uint3 size = (uint3)(d.width, d.height, d.depth); @@ -108,7 +108,10 @@ libagx_lower_txf_robustness(constant struct agx_texture_packed *ptr, valid &= layer < (linear ? d.depth_linear : d.depth); } - return valid ? x : 0xFFFF; + /* The maximum tail offset is 0xF so by returning 0xFFF0 for out-of-bounds we + * stay under 0xFFFF and keep robustness after offsetting. + */ + return valid ? x : 0xFFF0; } static uint32_t @@ -196,7 +199,9 @@ libagx_buffer_texel_address(constant const struct agx_pbe_packed *ptr, uint4 coord, uint bytes_per_pixel_B) { agx_unpack(NULL, ptr, PBE, d); - return d.buffer + (uint64_t)(coord.x * bytes_per_pixel_B); + + uint32_t x_el = d.buffer_offset_sw + coord.x; + return d.buffer + (uint64_t)(x_el * bytes_per_pixel_B); } /* Buffer texture lowerings */ @@ -216,3 +221,19 @@ libagx_texture_load_rgb32(constant struct agx_texture_packed *ptr, uint coord, return (uint4)(*data, is_float ? as_uint(1.0f) : 1); } + +uint +libagx_buffer_texture_offset(constant struct agx_texture_packed *ptr, uint x) +{ + agx_unpack(NULL, ptr, TEXTURE, d); + + return x + d.buffer_offset_sw; +} + +uint +libagx_buffer_image_offset(constant struct agx_pbe_packed *ptr, uint x) +{ + agx_unpack(NULL, ptr, PBE, d); + + return x + d.buffer_offset_sw; +} diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 54efd709bd2..10c1296efe1 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -722,9 +722,8 @@ agx_pack_texture(void *out, struct agx_resource *rsrc, cfg.width = AGX_TEXTURE_BUFFER_WIDTH; cfg.height = DIV_ROUND_UP(size_el, cfg.width); cfg.first_level = cfg.last_level = 0; - - /* Stash the actual size in the software-defined section for txs */ - cfg.software_defined = size_el; + cfg.buffer_size_sw = size_el; + cfg.buffer_offset_sw = 0; } else { cfg.width = rsrc->base.width0; cfg.height = rsrc->base.height0;