diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 43b536c81bd..6ab3fed0536 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -2018,7 +2018,8 @@ agx_optimize_nir(nir_shader *nir, unsigned *preamble_size) /* ABI: position first, then user, then psiz */ static void -agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings) +agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings, + struct agx_shader_key *key) { unsigned base = 0; @@ -2033,16 +2034,47 @@ agx_remap_varyings_vs(nir_shader *nir, struct agx_varyings_vs *varyings) varyings->slots[VARYING_SLOT_POS] = base; base += 4; - u_foreach_bit64(loc, nir->info.outputs_written) { + assert(!(key->vs.outputs_flat_shaded & key->vs.outputs_linear_shaded)); + + /* Smooth 32-bit user bindings go next */ + u_foreach_bit64(loc, nir->info.outputs_written & + ~key->vs.outputs_flat_shaded & + ~key->vs.outputs_linear_shaded) { if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ) continue; varyings->slots[loc] = base; base += 4; + varyings->num_32_smooth += 4; + } + + /* Flat 32-bit user bindings go next */ + u_foreach_bit64(loc, + nir->info.outputs_written & key->vs.outputs_flat_shaded) { + if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ) + continue; + + varyings->slots[loc] = base; + base += 4; + varyings->num_32_flat += 4; + } + + /* Linear 32-bit user bindings go next */ + u_foreach_bit64(loc, + nir->info.outputs_written & key->vs.outputs_linear_shaded) { + if (loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ) + continue; + + varyings->slots[loc] = base; + base += 4; + varyings->num_32_linear += 4; } /* TODO: Link FP16 varyings */ varyings->base_index_fp16 = base; + varyings->num_16_smooth = 0; + varyings->num_16_flat = 0; + varyings->num_16_linear = 0; if (nir->info.outputs_written & VARYING_BIT_PSIZ) { varyings->slots[VARYING_SLOT_PSIZ] = base; @@ -2471,7 +2503,7 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, /* Must be last since NIR passes can remap driver_location freely */ if (nir->info.stage == MESA_SHADER_VERTEX) - agx_remap_varyings_vs(nir, &out->varyings.vs); + agx_remap_varyings_vs(nir, &out->varyings.vs, key); if (agx_should_dump(nir, AGX_DBG_SHADERS)) nir_print_shader(nir, stdout); diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index a38aa0922b3..0ef4b9342b2 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -10,6 +10,17 @@ #include "util/u_dynarray.h" struct agx_varyings_vs { + /* The number of user varyings of each type. The varyings must be allocated + * in this order ({smooth, flat, linear} × {32, 16}), which may require + * remapping. + */ + unsigned num_32_smooth; + unsigned num_32_flat; + unsigned num_32_linear; + unsigned num_16_smooth; + unsigned num_16_flat; + unsigned num_16_linear; + /* The first index used for FP16 varyings. Indices less than this are treated * as FP32. This may require remapping slots to guarantee. */ diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 4ae696e1aa9..592ec625eac 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -2212,7 +2212,6 @@ agx_batch_init_state(struct agx_batch *batch) struct agx_ppp_update ppp = agx_new_ppp_update(&batch->pool, (struct AGX_PPP_HEADER){ .w_clamp = true, - .varying_counts_16 = true, .cull_2 = true, .occlusion_query_2 = true, .output_unknown = true, @@ -2221,7 +2220,6 @@ agx_batch_init_state(struct agx_batch *batch) /* clang-format off */ agx_ppp_push(&ppp, W_CLAMP, cfg) cfg.w_clamp = 1e-10; - agx_ppp_push(&ppp, VARYING_COUNTS, cfg); agx_ppp_push(&ppp, CULL_2, cfg); agx_ppp_push(&ppp, FRAGMENT_OCCLUSION_QUERY_2, cfg); agx_ppp_push(&ppp, OUTPUT_UNKNOWN, cfg); @@ -2416,6 +2414,7 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines, .fragment_back_stencil = IS_DIRTY(ZS), .output_select = IS_DIRTY(VS_PROG) || IS_DIRTY(FS_PROG), .varying_counts_32 = IS_DIRTY(VS_PROG), + .varying_counts_16 = IS_DIRTY(VS_PROG), .cull = IS_DIRTY(RS), .fragment_shader = IS_DIRTY(FS) || varyings_dirty || IS_DIRTY(SAMPLE_MASK), @@ -2506,9 +2505,19 @@ agx_encode_state(struct agx_batch *batch, uint8_t *out, bool is_lines, } } + assert(dirty.varying_counts_32 == dirty.varying_counts_16); + if (dirty.varying_counts_32) { agx_ppp_push(&ppp, VARYING_COUNTS, cfg) { - cfg.smooth = agx_num_general_outputs(&ctx->vs->info.varyings.vs); + cfg.smooth = vs->info.varyings.vs.num_32_smooth; + cfg.flat = vs->info.varyings.vs.num_32_flat; + cfg.linear = vs->info.varyings.vs.num_32_linear; + } + + agx_ppp_push(&ppp, VARYING_COUNTS, cfg) { + cfg.smooth = vs->info.varyings.vs.num_16_smooth; + cfg.flat = vs->info.varyings.vs.num_16_flat; + cfg.linear = vs->info.varyings.vs.num_16_linear; } }