diff --git a/src/asahi/compiler/agx_nir_lower_texture.c b/src/asahi/compiler/agx_nir_lower_texture.c index 727c2fa37a6..e3f42e9f03d 100644 --- a/src/asahi/compiler/agx_nir_lower_texture.c +++ b/src/asahi/compiler/agx_nir_lower_texture.c @@ -26,6 +26,17 @@ texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex) return nir_load_from_texture_handle_agx(b, tex->src[handle_idx].src.ssa); } +static bool +has_nonzero_lod(nir_tex_instr *tex) +{ + int idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); + if (idx < 0) + return false; + + nir_src src = tex->src[idx].src; + return !(nir_src_is_const(src) && nir_src_as_uint(src) == 0); +} + static bool lower_tex_crawl(nir_builder *b, nir_instr *instr, UNUSED void *data) { @@ -167,6 +178,21 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data) nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord); nir_def *ms_idx = nir_steal_tex_src(tex, nir_tex_src_ms_index); + /* Apply txf workaround, see libagx_lower_txf_robustness */ + bool is_txf = ((tex->op == nir_texop_txf) || (tex->op == nir_texop_txf_ms)); + + if (is_txf && has_nonzero_lod(tex) && + !(tex->backend_flags & AGX_TEXTURE_FLAG_NO_CLAMP)) { + + int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); + + nir_def *replaced = libagx_lower_txf_robustness( + b, texture_descriptor_ptr(b, tex), tex->src[lod_idx].src.ssa, + nir_channel(b, coord, 0)); + + coord = nir_vector_insert_imm(b, coord, replaced, 0); + } + /* The layer is always the last component of the NIR coordinate, split it off * because we'll need to swizzle. */ @@ -178,7 +204,7 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data) coord = nir_trim_vector(b, coord, lidx); /* Round layer to nearest even */ - if (tex->op != nir_texop_txf && tex->op != nir_texop_txf_ms) + if (!is_txf) unclamped_layer = nir_f2u32(b, nir_fround_even(b, unclamped_layer)); /* For a cube array, the layer is zero-indexed component 3 of the @@ -572,6 +598,11 @@ agx_nir_lower_texture(nir_shader *s) nir_metadata_block_index | nir_metadata_dominance, NULL); NIR_PASS(progress, s, nir_legalize_16bit_sampler_srcs, tex_constraints); + /* Fold constants after nir_legalize_16bit_sampler_srcs so we can detect 0 in + * lower_regular_texture. This is required for correctness. + */ + NIR_PASS(progress, s, nir_opt_constant_folding); + /* Lower texture sources after legalizing types (as the lowering depends on * 16-bit multisample indices) but before lowering queries (as the lowering * generates txs for array textures). @@ -661,9 +692,16 @@ agx_nir_needs_texture_crawl(nir_instr *instr) case nir_texop_query_levels: return true; - /* Buffer textures need their format read */ + /* Buffer textures need their format read and txf needs its LOD clamped. + * Buffer textures are only read through txf. + */ + case nir_texop_txf: + case nir_texop_txf_ms: + return has_nonzero_lod(tex) || + tex->sampler_dim == GLSL_SAMPLER_DIM_BUF; + default: - return tex->sampler_dim == GLSL_SAMPLER_DIM_BUF; + return false; } } diff --git a/src/asahi/lib/shaders/texture.cl b/src/asahi/lib/shaders/texture.cl index f82e79718b4..6211c9ca075 100644 --- a/src/asahi/lib/shaders/texture.cl +++ b/src/asahi/lib/shaders/texture.cl @@ -80,6 +80,27 @@ libagx_texture_levels(constant struct agx_texture_packed *ptr) return (d.last_level - d.first_level) + 1; } +/* + * Fix robustness behaviour of txf with out-of-bounds LOD. The hardware + * returns the correct out-of-bounds colour for out-of-bounds coordinates, + * just not LODs. So translate out-of-bounds LOD into an out-of-bounds + * coordinate to get correct behaviour in 1 instruction. + * + * Returns the fixed X-coordinate. + * + * TODO: This looks like it might be an erratum workaround on G13 (Apple does + * it), maybe check if G15 is affected. + */ +uint +libagx_lower_txf_robustness(constant struct agx_texture_packed *ptr, ushort lod, + uint x) +{ + agx_unpack(NULL, ptr, TEXTURE, d); + + bool oob = (lod > (d.last_level - d.first_level)); + return oob ? 0xFFFF : x; +} + static uint32_t calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px, uint16_t tile_h_px, uint32_t width_tl)