From 9578b47af3912e7f7267e5b4723c610ea187c921 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 16 Dec 2022 23:38:07 -0500 Subject: [PATCH] agx: Implement depth and stencil export Lower FRAG_RESULT_DEPTH and FRAG_RESULT_STENCIL writes to a combnied zs_emit instruction with a multisampling index. To be used in the following commit. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 42 ++++++++++ src/asahi/compiler/agx_compile.h | 3 + src/asahi/compiler/agx_compiler.h | 2 + src/asahi/compiler/agx_nir_lower_zs_emit.c | 89 ++++++++++++++++++++++ src/asahi/compiler/agx_opcodes.py | 5 ++ src/asahi/compiler/agx_optimizer.c | 2 + src/asahi/compiler/agx_pack.c | 26 +++++++ src/asahi/compiler/meson.build | 1 + 8 files changed, 170 insertions(+) create mode 100644 src/asahi/compiler/agx_nir_lower_zs_emit.c diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index bd7373e6819..a2f44cb2428 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -494,6 +494,32 @@ agx_emit_local_store_pixel(agx_builder *b, nir_intrinsic_instr *instr) nir_intrinsic_base(instr)); } +static agx_instr * +agx_emit_store_zs(agx_builder *b, nir_intrinsic_instr *instr) +{ + unsigned base = nir_intrinsic_base(instr); + bool write_z = base & 1; + bool write_s = base & 2; + + /* TODO: Handle better */ + assert(!b->shader->key->fs.ignore_tib_dependencies && "not used"); + agx_writeout(b, 0x0001); + + agx_index z = agx_src_index(&instr->src[1]); + agx_index s = agx_src_index(&instr->src[2]); + + agx_index zs = (write_z && write_s) ? agx_vec2(b, z, s) : + write_z ? z : + s; + + /* Not necessarily a sample mask but overlapping hw mechanism... Should + * maybe rename this flag to something more general. + */ + b->shader->out->writes_sample_mask = true; + + return agx_zs_emit(b, agx_src_index(&instr->src[0]), zs, base); +} + static void agx_emit_local_load_pixel(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr) { @@ -713,6 +739,10 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) assert(stage == MESA_SHADER_VERTEX); return agx_emit_store_vary(b, instr); + case nir_intrinsic_store_zs_agx: + assert(stage == MESA_SHADER_FRAGMENT); + return agx_emit_store_zs(b, instr); + case nir_intrinsic_store_local_pixel_agx: assert(stage == MESA_SHADER_FRAGMENT); return agx_emit_local_store_pixel(b, instr); @@ -1880,6 +1910,8 @@ agx_preprocess_nir(nir_shader *nir) NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, glsl_type_size, 0); if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS_V(nir, agx_nir_lower_zs_emit); + /* Interpolate varyings at fp16 and write to the tilebuffer at fp16. As an * exception, interpolate flat shaded at fp32. This works around a * hardware limitation. The resulting code (with an extra f2f16 at the end @@ -1952,6 +1984,16 @@ agx_compile_shader_nir(nir_shader *nir, out->no_colour_output = !(nir->info.outputs_written >> FRAG_RESULT_DATA0); out->disable_tri_merging = nir->info.fs.needs_all_helper_invocations || nir->info.fs.needs_quad_helper_invocations; + + /* Report a canonical depth layout */ + enum gl_frag_depth_layout layout = nir->info.fs.depth_layout; + + if (!(nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DEPTH))) + out->depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; + else if (layout == FRAG_DEPTH_LAYOUT_NONE) + out->depth_layout = FRAG_DEPTH_LAYOUT_ANY; + else + out->depth_layout = layout; } agx_optimize_nir(nir, &out->push_count); diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 2dabdb775a4..ad310e2de20 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -173,6 +173,9 @@ struct agx_shader_info { /* Does the shader control the sample mask? */ bool writes_sample_mask; + /* Depth layout, never equal to NONE */ + enum gl_frag_depth_layout depth_layout; + /* Is colour output omitted? */ bool no_colour_output; diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index 61ba7075412..8a31f3b52e9 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -301,6 +301,7 @@ typedef struct { uint32_t channels; uint32_t bfi_mask; uint16_t pixel_offset; + uint16_t zs; enum agx_sr sr; enum agx_icond icond; enum agx_fcond fcond; @@ -806,6 +807,7 @@ void agx_compute_liveness(agx_context *ctx); void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I); bool agx_lower_resinfo(nir_shader *s); +bool agx_nir_lower_zs_emit(nir_shader *s); bool agx_nir_lower_array_texture(nir_shader *s); bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size); bool agx_nir_lower_load_mask(nir_shader *shader); diff --git a/src/asahi/compiler/agx_nir_lower_zs_emit.c b/src/asahi/compiler/agx_nir_lower_zs_emit.c new file mode 100644 index 00000000000..8cec781d7ea --- /dev/null +++ b/src/asahi/compiler/agx_nir_lower_zs_emit.c @@ -0,0 +1,89 @@ +/* + * Copyright 2022 Alyssa Rosenzweig + * SPDX-License-Identifier: MIT + */ + +#include "agx_compiler.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" + +#define ALL_SAMPLES 0xFF +#define BASE_Z 1 +#define BASE_S 2 + +static bool +lower(nir_function_impl *impl, nir_block *block) +{ + nir_intrinsic_instr *zs_emit = NULL; + bool progress = false; + + nir_foreach_instr_reverse_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_store_output) + continue; + + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + if (sem.location != FRAG_RESULT_DEPTH && sem.location != FRAG_RESULT_STENCIL) + continue; + + if (zs_emit == NULL) { + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_before_instr(instr); + + /* Multisampling will get lowered later if needed, default to broadcast */ + nir_ssa_def *sample_mask = nir_imm_intN_t(&b, ALL_SAMPLES, 16); + zs_emit = nir_store_zs_agx(&b, sample_mask, + nir_ssa_undef(&b, 1, 32) /* depth */, + nir_ssa_undef(&b, 1, 16) /* stencil */); + } + + nir_ssa_def *value = intr->src[0].ssa; + + bool z = (sem.location == FRAG_RESULT_DEPTH); + unsigned src_idx = z ? 1 : 2; + unsigned base = z ? BASE_Z : BASE_S; + + assert((nir_intrinsic_base(zs_emit) & base) == 0 && + "each of depth/stencil may only be written once"); + + nir_instr_rewrite_src_ssa(&zs_emit->instr, &zs_emit->src[src_idx], value); + nir_intrinsic_set_base(zs_emit, nir_intrinsic_base(zs_emit) | base); + + nir_instr_remove(instr); + progress = true; + } + + return progress; +} + +bool +agx_nir_lower_zs_emit(nir_shader *s) +{ + bool any_progress = false; + + nir_foreach_function(function, s) { + if (!function->impl) + continue; + + bool progress = false; + + nir_foreach_block(block, function->impl) { + progress |= lower(function->impl, block); + } + + if (progress) { + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } else { + nir_metadata_preserve(function->impl, nir_metadata_all); + } + + any_progress |= progress; + } + + return any_progress; +} diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index 75f7f6ff4d2..17c34c3ebfd 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -117,6 +117,7 @@ NEST = immediate("nest") INVERT_COND = immediate("invert_cond") NEST = immediate("nest") TARGET = immediate("target", "agx_block *") +ZS = immediate("zs") PERSPECTIVE = immediate("perspective", "bool") SR = enum("sr", { 0: 'threadgroup_position_in_grid.x', @@ -251,6 +252,10 @@ op("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR]) op("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 1, can_eliminate = False) +# Sources: sample mask, combined depth/stencil +op("zs_emit", (0x41, 0xFF | L, 4, _), dests = 0, srcs = 2, + can_eliminate = False, imms = [ZS]) + # Essentially same encoding. Last source is the sample mask op("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 1, imms = [FORMAT, MASK, PIXEL_OFFSET], can_reorder = False) diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index 24cc4786963..0deb128a402 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -123,6 +123,7 @@ agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, /* cmpselsrc takes integer immediates only */ if (s >= 2 && I->op == AGX_OPCODE_FCMPSEL) float_src = false; if (I->op == AGX_OPCODE_ST_TILE && s == 0) continue; + if (I->op == AGX_OPCODE_ZS_EMIT && s != 0) continue; if (float_src) { bool fp16 = (def->dest[0].size == AGX_SIZE_16); @@ -179,6 +180,7 @@ agx_optimizer_copyprop(agx_instr **defs, agx_instr *I) (I->op == AGX_OPCODE_DEVICE_LOAD && (s != 0 || def->src[0].value >= 256)) || I->op == AGX_OPCODE_PHI || + I->op == AGX_OPCODE_ZS_EMIT || I->op == AGX_OPCODE_ST_TILE || I->op == AGX_OPCODE_LD_TILE || I->op == AGX_OPCODE_BLOCK_IMAGE_STORE || diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index 6f1d321bad6..b19365b6432 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -719,6 +719,32 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx break; } + case AGX_OPCODE_ZS_EMIT: + { + agx_index S = I->src[0]; + if (S.type == AGX_INDEX_IMMEDIATE) + assert(S.value < BITFIELD_BIT(8)); + else + assert_register_is_aligned(S); + + agx_index T = I->src[1]; + assert_register_is_aligned(T); + + assert(I->zs >= 1 && I->zs <= 3); + + uint32_t word0 = + agx_opcodes_info[I->op].encoding.exact | + ((S.type == AGX_INDEX_IMMEDIATE) ? (1 << 8) : 0) | + ((S.value & BITFIELD_MASK(6)) << 9) | + ((T.value & BITFIELD_MASK(6)) << 16) | + ((T.value >> 6) << 26) | + ((S.value >> 6) << 24) | + (I->zs << 29); + + memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4); + break; + } + case AGX_OPCODE_JMP_EXEC_ANY: case AGX_OPCODE_JMP_EXEC_NONE: { diff --git a/src/asahi/compiler/meson.build b/src/asahi/compiler/meson.build index 676bce4479e..69e555e62b6 100644 --- a/src/asahi/compiler/meson.build +++ b/src/asahi/compiler/meson.build @@ -23,6 +23,7 @@ libasahi_agx_files = files( 'agx_compile.c', 'agx_dce.c', 'agx_liveness.c', + 'agx_nir_lower_zs_emit.c', 'agx_nir_lower_texture.c', 'agx_nir_lower_load_mask.c', 'agx_nir_lower_ubo.c',