radv/llvm: fix GS shaders on GFX8/9

6698753cdb switched our GS output stores to use MUBUF.

The stride doesn't matter for the ESGS descriptor (because idxen=false and
the index stride is 64), but this fixes it anyway.

This also changes ACO to use MUBUF store too, since MTBUF doesn't seem to
work correctly with an invalid data format in the descriptor.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Fixes: 6698753cdb ("ac/llvm: don't use tbuffer_store as a fallback for swizzled stores")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18885>
(cherry picked from commit a71d068fd0)
This commit is contained in:
Rhys Perry
2022-09-29 14:22:16 +01:00
committed by Dylan Baker
parent df8d107ab0
commit 46f2002521
3 changed files with 21 additions and 15 deletions

View File

@@ -193,7 +193,7 @@
"description": "radv/llvm: fix GS shaders on GFX8/9",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "6698753cdb6d001669f51e23d42fec65d74e6b58"
},

View File

@@ -7762,20 +7762,18 @@ visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr)
const_offset %= 4096u;
}
aco_ptr<MTBUF_instruction> mtbuf{create_instruction<MTBUF_instruction>(
aco_opcode::tbuffer_store_format_x, Format::MTBUF, 4, 0)};
mtbuf->operands[0] = Operand(gsvs_ring);
mtbuf->operands[1] = vaddr_offset;
mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset));
mtbuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]);
mtbuf->offen = !vaddr_offset.isUndefined();
mtbuf->dfmt = V_008F0C_BUF_DATA_FORMAT_32;
mtbuf->nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
mtbuf->offset = const_offset;
mtbuf->glc = ctx->program->gfx_level < GFX11;
mtbuf->slc = true;
mtbuf->sync = memory_sync_info(storage_vmem_output, semantic_can_reorder);
bld.insert(std::move(mtbuf));
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(
aco_opcode::buffer_store_dword, Format::MUBUF, 4, 0)};
mubuf->operands[0] = Operand(gsvs_ring);
mubuf->operands[1] = vaddr_offset;
mubuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset));
mubuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]);
mubuf->offen = !vaddr_offset.isUndefined();
mubuf->offset = const_offset;
mubuf->glc = ctx->program->gfx_level < GFX11;
mubuf->slc = true;
mubuf->sync = memory_sync_info(storage_vmem_output, semantic_can_reorder);
bld.insert(std::move(mubuf));
}
offset += ctx->shader->info.gs.vertices_out;

View File

@@ -3877,6 +3877,10 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else if (device->physical_device->rad_info.gfx_level >= GFX8) {
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1);
} else {
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
@@ -3949,6 +3953,10 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else if (device->physical_device->rad_info.gfx_level >= GFX8) {
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1);
} else {
desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);