nak: Fix TCS output reads
The hardware uses the lane index for per-vertex TCS output reads rather
than the vertex index. Fortunately, it's a pretty easy calculation to
go from one to the other.
Fixes: abe9c1fea2
("nak: Add NIR lowering for attribute I/O")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27284>
This commit is contained in:

committed by
Marge Bot

parent
e44e57b4e7
commit
99ef70d8aa
@@ -6,11 +6,55 @@
|
||||
#include "nak_private.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
static nir_def *
|
||||
tess_ctrl_output_vtx(nir_builder *b, nir_def *vtx)
|
||||
{
|
||||
/* This is the pattern we see emitted by the blob driver:
|
||||
*
|
||||
* S2R R0, SR_LANEID
|
||||
* S2R R6, SR_INVOCATION_ID
|
||||
* # R3 is our vertex index
|
||||
* SULD.P.2D.CTA.R.IGN R3, [R2], 0x1d, 0x0
|
||||
* IMAD.IADD R5, R0, 0x1, -R6
|
||||
* IMAD.SHL.U32 R0, R3, 0x4, RZ
|
||||
* LEA.HI.SX32 R4, R0, R5, 0x1e
|
||||
* ALD.O R4, a[0x88], R4
|
||||
*
|
||||
* Translating the MADs and re-naming registers, this is
|
||||
*
|
||||
* %r0 = iadd %lane -%invoc
|
||||
* %r1 = imul %vtx 0x4
|
||||
* %r2 = lea.hi.sx32 %r1 %r0 0x1e
|
||||
* %out = ald.o a[%r2][0x88]
|
||||
*
|
||||
* But `lea.hi.sx32 %r1 %r0 0x1e` is just `(%r1 >> (32 - 0x1e)) + %r0`.
|
||||
* Since %r1 is just `%vtx * 4` and 0x1e is 30, the whole bit on the left
|
||||
* is `(%vtx * 4) >> 2 = %vtx`, assuming no overflow. So, this means
|
||||
*
|
||||
* %r0 = iadd %lane -%invoc
|
||||
* %r2 = iadd %vtx %r0
|
||||
* %out = ald.o a[%r2][0x88]
|
||||
*
|
||||
* In other words, the hardware actually indexes them by lane index with
|
||||
* all of the invocations for a given TCS dispatch going in a sequential
|
||||
* range of lanes. We have to compute the lane index of the requested
|
||||
* invocation from the invocation index.
|
||||
*/
|
||||
nir_def *lane = nir_load_sysval_nv(b, 32, .base = NAK_SV_LANE_ID,
|
||||
.access = ACCESS_CAN_REORDER);
|
||||
nir_def *invoc = nir_load_sysval_nv(b, 32, .base = NAK_SV_INVOCATION_ID,
|
||||
.access = ACCESS_CAN_REORDER);
|
||||
|
||||
return nir_iadd(b, lane, nir_iadd(b, vtx, nir_ineg(b, invoc)));
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_vtg_io_intrin(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *cb_data)
|
||||
{
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_def *vtx = NULL, *offset = NULL, *data = NULL;
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
@@ -19,11 +63,18 @@ lower_vtg_io_intrin(nir_builder *b,
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_per_vertex_input:
|
||||
case nir_intrinsic_load_per_vertex_output:
|
||||
vtx = intrin->src[0].ssa;
|
||||
offset = intrin->src[1].ssa;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_per_vertex_output:
|
||||
if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
|
||||
vtx = tess_ctrl_output_vtx(b, intrin->src[0].ssa);
|
||||
else
|
||||
vtx = intrin->src[0].ssa;
|
||||
offset = intrin->src[1].ssa;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_output:
|
||||
data = intrin->src[0].ssa;
|
||||
offset = intrin->src[1].ssa;
|
||||
@@ -88,8 +139,6 @@ lower_vtg_io_intrin(nir_builder *b,
|
||||
else
|
||||
mask = nir_component_mask(intrin->num_components);
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
if (vtx != NULL && !is_output) {
|
||||
nir_def *info = nir_load_sysval_nv(b, 32,
|
||||
.base = NAK_SV_INVOCATION_INFO,
|
||||
|
Reference in New Issue
Block a user