radeonsi/gfx10: generate VS and TES as NGG merged ESGS shaders
This does not support geometry shading yet. Also missing are streamout and NGG-specific optimizations. Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:

committed by
Marek Olšák

parent
e86256c512
commit
612489bd5d
@@ -6,6 +6,7 @@ C_SOURCES := \
|
||||
$(GENERATED_SOURCES) \
|
||||
cik_sdma.c \
|
||||
driinfo_radeonsi.h \
|
||||
gfx10_shader_ngg.c \
|
||||
si_blit.c \
|
||||
si_buffer.c \
|
||||
si_build_pm4.h \
|
||||
|
265
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
Normal file
265
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
Normal file
@@ -0,0 +1,265 @@
|
||||
/*
|
||||
* Copyright 2017 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "si_pipe.h"
|
||||
#include "si_shader_internal.h"
|
||||
|
||||
#include "sid.h"
|
||||
|
||||
#include "util/u_memory.h"
|
||||
|
||||
static LLVMValueRef get_wave_id_in_tg(struct si_shader_context *ctx)
|
||||
{
|
||||
return si_unpack_param(ctx, ctx->param_merged_wave_info, 24, 4);
|
||||
}
|
||||
|
||||
static LLVMValueRef ngg_get_vtx_cnt(struct si_shader_context *ctx)
|
||||
{
|
||||
return ac_build_bfe(&ctx->ac, ctx->gs_tg_info,
|
||||
LLVMConstInt(ctx->ac.i32, 12, false),
|
||||
LLVMConstInt(ctx->ac.i32, 9, false),
|
||||
false);
|
||||
}
|
||||
|
||||
static LLVMValueRef ngg_get_prim_cnt(struct si_shader_context *ctx)
|
||||
{
|
||||
return ac_build_bfe(&ctx->ac, ctx->gs_tg_info,
|
||||
LLVMConstInt(ctx->ac.i32, 22, false),
|
||||
LLVMConstInt(ctx->ac.i32, 9, false),
|
||||
false);
|
||||
}
|
||||
|
||||
/* Send GS Alloc Req message from the first wave of the group to SPI.
|
||||
* Message payload is:
|
||||
* - bits 0..10: vertices in group
|
||||
* - bits 12..22: primitives in group
|
||||
*/
|
||||
static void build_sendmsg_gs_alloc_req(struct si_shader_context *ctx,
|
||||
LLVMValueRef vtx_cnt,
|
||||
LLVMValueRef prim_cnt)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef tmp;
|
||||
|
||||
tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
|
||||
ac_build_ifcc(&ctx->ac, tmp, 5020);
|
||||
|
||||
tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->ac.i32, 12, false),"");
|
||||
tmp = LLVMBuildOr(builder, tmp, vtx_cnt, "");
|
||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_ALLOC_REQ, tmp);
|
||||
|
||||
ac_build_endif(&ctx->ac, 5020);
|
||||
}
|
||||
|
||||
struct ngg_prim {
|
||||
unsigned num_vertices;
|
||||
LLVMValueRef isnull;
|
||||
LLVMValueRef index[3];
|
||||
LLVMValueRef edgeflag[3];
|
||||
};
|
||||
|
||||
static void build_export_prim(struct si_shader_context *ctx,
|
||||
const struct ngg_prim *prim)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
struct ac_export_args args;
|
||||
LLVMValueRef tmp;
|
||||
|
||||
tmp = LLVMBuildZExt(builder, prim->isnull, ctx->ac.i32, "");
|
||||
args.out[0] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 31, false), "");
|
||||
|
||||
for (unsigned i = 0; i < prim->num_vertices; ++i) {
|
||||
tmp = LLVMBuildShl(builder, prim->index[i],
|
||||
LLVMConstInt(ctx->ac.i32, 10 * i, false), "");
|
||||
args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
|
||||
tmp = LLVMBuildZExt(builder, prim->edgeflag[i], ctx->ac.i32, "");
|
||||
tmp = LLVMBuildShl(builder, tmp,
|
||||
LLVMConstInt(ctx->ac.i32, 10 * i + 9, false), "");
|
||||
args.out[0] = LLVMBuildOr(builder, args.out[0], tmp, "");
|
||||
}
|
||||
|
||||
args.out[0] = LLVMBuildBitCast(builder, args.out[0], ctx->ac.f32, "");
|
||||
args.out[1] = LLVMGetUndef(ctx->ac.f32);
|
||||
args.out[2] = LLVMGetUndef(ctx->ac.f32);
|
||||
args.out[3] = LLVMGetUndef(ctx->ac.f32);
|
||||
|
||||
args.target = V_008DFC_SQ_EXP_PRIM;
|
||||
args.enabled_channels = 1;
|
||||
args.done = true;
|
||||
args.valid_mask = false;
|
||||
args.compr = false;
|
||||
|
||||
ac_build_export(&ctx->ac, &args);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit the epilogue of an API VS or TES shader compiled as ESGS shader.
|
||||
*/
|
||||
void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
|
||||
unsigned max_outputs,
|
||||
LLVMValueRef *addrs)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct tgsi_shader_info *info = &ctx->shader->selector->info;
|
||||
struct si_shader_output_values *outputs = NULL;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
struct lp_build_if_state if_state;
|
||||
LLVMValueRef tmp;
|
||||
|
||||
assert(!ctx->shader->is_gs_copy_shader);
|
||||
assert(info->num_outputs <= max_outputs);
|
||||
|
||||
outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
|
||||
|
||||
for (unsigned i = 0; i < info->num_outputs; i++) {
|
||||
outputs[i].semantic_name = info->output_semantic_name[i];
|
||||
outputs[i].semantic_index = info->output_semantic_index[i];
|
||||
|
||||
/* This is used only by streamout. */
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
outputs[i].values[j] =
|
||||
LLVMBuildLoad(builder,
|
||||
addrs[4 * i + j],
|
||||
"");
|
||||
outputs[i].vertex_stream[j] =
|
||||
(info->output_streams[i] >> (2 * j)) & 3;
|
||||
}
|
||||
}
|
||||
|
||||
lp_build_endif(&ctx->merged_wrap_if_state);
|
||||
|
||||
LLVMValueRef prims_in_wave = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
|
||||
LLVMValueRef vtx_in_wave = si_unpack_param(ctx, ctx->param_merged_wave_info, 0, 8);
|
||||
LLVMValueRef is_gs_thread = LLVMBuildICmp(builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac), prims_in_wave, "");
|
||||
LLVMValueRef is_es_thread = LLVMBuildICmp(builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac), vtx_in_wave, "");
|
||||
LLVMValueRef vtxindex[] = {
|
||||
si_unpack_param(ctx, ctx->param_gs_vtx01_offset, 0, 16),
|
||||
si_unpack_param(ctx, ctx->param_gs_vtx01_offset, 16, 16),
|
||||
si_unpack_param(ctx, ctx->param_gs_vtx23_offset, 0, 16),
|
||||
};
|
||||
|
||||
/* Determine the number of vertices per primitive. */
|
||||
unsigned num_vertices;
|
||||
LLVMValueRef num_vertices_val;
|
||||
|
||||
if (ctx->type == PIPE_SHADER_VERTEX) {
|
||||
if (info->properties[TGSI_PROPERTY_VS_BLIT_SGPRS]) {
|
||||
/* Blits always use axis-aligned rectangles with 3 vertices. */
|
||||
num_vertices = 3;
|
||||
num_vertices_val = LLVMConstInt(ctx->i32, 3, 0);
|
||||
} else {
|
||||
/* Extract OUTPRIM field. */
|
||||
tmp = si_unpack_param(ctx, ctx->param_vs_state_bits, 2, 2);
|
||||
num_vertices_val = LLVMBuildAdd(builder, tmp, ctx->i32_1, "");
|
||||
num_vertices = 3; /* TODO: optimize for points & lines */
|
||||
}
|
||||
} else {
|
||||
assert(ctx->type == PIPE_SHADER_TESS_EVAL);
|
||||
|
||||
if (info->properties[TGSI_PROPERTY_TES_POINT_MODE])
|
||||
num_vertices = 1;
|
||||
else if (info->properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_LINES)
|
||||
num_vertices = 2;
|
||||
else
|
||||
num_vertices = 3;
|
||||
|
||||
num_vertices_val = LLVMConstInt(ctx->i32, num_vertices, false);
|
||||
}
|
||||
|
||||
/* TODO: streamout */
|
||||
|
||||
/* TODO: primitive culling */
|
||||
|
||||
build_sendmsg_gs_alloc_req(ctx, ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
|
||||
|
||||
/* Export primitive data to the index buffer. Format is:
|
||||
* - bits 0..8: index 0
|
||||
* - bit 9: edge flag 0
|
||||
* - bits 10..18: index 1
|
||||
* - bit 19: edge flag 1
|
||||
* - bits 20..28: index 2
|
||||
* - bit 29: edge flag 2
|
||||
* - bit 31: null primitive (skip)
|
||||
*
|
||||
* For the first version, we will always build up all three indices
|
||||
* independent of the primitive type. The additional garbage data
|
||||
* shouldn't hurt.
|
||||
*
|
||||
* TODO: culling depends on the primitive type, so can have some
|
||||
* interaction here.
|
||||
*/
|
||||
lp_build_if(&if_state, &ctx->gallivm, is_gs_thread);
|
||||
{
|
||||
struct ngg_prim prim = {};
|
||||
|
||||
prim.num_vertices = num_vertices;
|
||||
prim.isnull = ctx->ac.i1false;
|
||||
memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
|
||||
|
||||
for (unsigned i = 0; i < num_vertices; ++i) {
|
||||
tmp = LLVMBuildLShr(builder, ctx->abi.gs_invocation_id,
|
||||
LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
|
||||
prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
|
||||
}
|
||||
|
||||
build_export_prim(ctx, &prim);
|
||||
}
|
||||
lp_build_endif(&if_state);
|
||||
|
||||
/* Export per-vertex data (positions and parameters). */
|
||||
lp_build_if(&if_state, &ctx->gallivm, is_es_thread);
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
/* Unconditionally (re-)load the values for proper SSA form. */
|
||||
for (i = 0; i < info->num_outputs; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
outputs[i].values[j] =
|
||||
LLVMBuildLoad(builder,
|
||||
addrs[4 * i + j],
|
||||
"");
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: Vertex shaders have to get PrimitiveID from GS VGPRs. */
|
||||
if (ctx->type == PIPE_SHADER_TESS_EVAL &&
|
||||
ctx->shader->key.mono.u.vs_export_prim_id) {
|
||||
outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
|
||||
outputs[i].semantic_index = 0;
|
||||
outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
|
||||
for (unsigned j = 1; j < 4; j++)
|
||||
outputs[i].values[j] = LLVMGetUndef(ctx->f32);
|
||||
|
||||
memset(outputs[i].vertex_stream, 0,
|
||||
sizeof(outputs[i].vertex_stream));
|
||||
i++;
|
||||
}
|
||||
|
||||
si_llvm_export_vs(ctx, outputs, i);
|
||||
}
|
||||
lp_build_endif(&if_state);
|
||||
|
||||
FREE(outputs);
|
||||
}
|
@@ -21,6 +21,7 @@
|
||||
files_libradeonsi = files(
|
||||
'cik_sdma.c',
|
||||
'driinfo_radeonsi.h',
|
||||
'gfx10_shader_ngg.c',
|
||||
'si_blit.c',
|
||||
'si_buffer.c',
|
||||
'si_build_pm4.h',
|
||||
|
@@ -4412,6 +4412,10 @@ static void declare_streamout_params(struct si_shader_context *ctx,
|
||||
static unsigned si_get_max_workgroup_size(const struct si_shader *shader)
|
||||
{
|
||||
switch (shader->selector->type) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
case PIPE_SHADER_TESS_EVAL:
|
||||
return shader->key.as_ngg ? 128 : 0;
|
||||
|
||||
case PIPE_SHADER_TESS_CTRL:
|
||||
/* Return this so that LLVM doesn't remove s_barrier
|
||||
* instructions on chips where we use s_barrier. */
|
||||
@@ -4582,7 +4586,7 @@ static void create_function(struct si_shader_context *ctx)
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
if (shader->key.as_ls || type == PIPE_SHADER_TESS_CTRL)
|
||||
type = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
|
||||
else if (shader->key.as_es || type == PIPE_SHADER_GEOMETRY)
|
||||
else if (shader->key.as_es || shader->key.as_ngg || type == PIPE_SHADER_GEOMETRY)
|
||||
type = SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY;
|
||||
}
|
||||
|
||||
@@ -4708,7 +4712,12 @@ static void create_function(struct si_shader_context *ctx)
|
||||
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
|
||||
declare_per_stage_desc_pointers(ctx, &fninfo,
|
||||
ctx->type == PIPE_SHADER_GEOMETRY);
|
||||
ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||
|
||||
if (ctx->shader->key.as_ngg)
|
||||
add_arg_assign(&fninfo, ARG_SGPR, ctx->i32, &ctx->gs_tg_info);
|
||||
else
|
||||
ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||
|
||||
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||
ctx->param_merged_scratch_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||
@@ -4716,11 +4725,17 @@ static void create_function(struct si_shader_context *ctx)
|
||||
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
|
||||
|
||||
declare_global_desc_pointers(ctx, &fninfo);
|
||||
declare_per_stage_desc_pointers(ctx, &fninfo,
|
||||
(ctx->type == PIPE_SHADER_VERTEX ||
|
||||
ctx->type == PIPE_SHADER_TESS_EVAL));
|
||||
if (ctx->type != PIPE_SHADER_VERTEX || !vs_blit_property) {
|
||||
declare_per_stage_desc_pointers(ctx, &fninfo,
|
||||
(ctx->type == PIPE_SHADER_VERTEX ||
|
||||
ctx->type == PIPE_SHADER_TESS_EVAL));
|
||||
}
|
||||
|
||||
if (ctx->type == PIPE_SHADER_VERTEX) {
|
||||
declare_vs_specific_input_sgprs(ctx, &fninfo);
|
||||
if (vs_blit_property)
|
||||
declare_vs_blit_inputs(ctx, &fninfo, vs_blit_property);
|
||||
else
|
||||
declare_vs_specific_input_sgprs(ctx, &fninfo);
|
||||
} else {
|
||||
ctx->param_vs_state_bits = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
|
||||
@@ -4747,8 +4762,9 @@ static void create_function(struct si_shader_context *ctx)
|
||||
declare_tes_input_vgprs(ctx, &fninfo);
|
||||
}
|
||||
|
||||
if (ctx->type == PIPE_SHADER_VERTEX ||
|
||||
ctx->type == PIPE_SHADER_TESS_EVAL) {
|
||||
if (ctx->shader->key.as_es &&
|
||||
(ctx->type == PIPE_SHADER_VERTEX ||
|
||||
ctx->type == PIPE_SHADER_TESS_EVAL)) {
|
||||
unsigned num_user_sgprs;
|
||||
|
||||
if (ctx->type == PIPE_SHADER_VERTEX)
|
||||
@@ -5925,6 +5941,8 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx)
|
||||
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
|
||||
else if (shader->key.opt.vs_as_prim_discard_cs)
|
||||
ctx->abi.emit_outputs = si_llvm_emit_prim_discard_cs_epilogue;
|
||||
else if (shader->key.as_ngg)
|
||||
ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
|
||||
else
|
||||
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
|
||||
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
|
||||
@@ -5948,8 +5966,12 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx)
|
||||
ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
|
||||
if (shader->key.as_es)
|
||||
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
|
||||
else
|
||||
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
|
||||
else {
|
||||
if (shader->key.as_ngg)
|
||||
ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
|
||||
else
|
||||
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
|
||||
}
|
||||
bld_base->emit_epilogue = si_tgsi_emit_epilogue;
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
@@ -5994,6 +6016,10 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx)
|
||||
*
|
||||
* For monolithic merged shaders, the first shader is wrapped in an
|
||||
* if-block together with its prolog in si_build_wrapper_function.
|
||||
*
|
||||
* NGG vertex and tess eval shaders running as the last
|
||||
* vertex/geometry stage handle execution explicitly using
|
||||
* if-statements.
|
||||
*/
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
if (!shader->is_monolithic &&
|
||||
@@ -6005,28 +6031,50 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx)
|
||||
si_init_exec_from_input(ctx,
|
||||
ctx->param_merged_wave_info, 0);
|
||||
} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
|
||||
ctx->type == PIPE_SHADER_GEOMETRY) {
|
||||
ctx->type == PIPE_SHADER_GEOMETRY ||
|
||||
shader->key.as_ngg) {
|
||||
LLVMValueRef num_threads;
|
||||
bool nested_barrier;
|
||||
|
||||
if (!shader->is_monolithic)
|
||||
ac_init_exec_full_mask(&ctx->ac);
|
||||
|
||||
LLVMValueRef num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
|
||||
if (ctx->type == PIPE_SHADER_TESS_CTRL ||
|
||||
ctx->type == PIPE_SHADER_GEOMETRY) {
|
||||
/* Number of patches / primitives */
|
||||
num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
|
||||
nested_barrier = true;
|
||||
} else {
|
||||
/* Number of vertices */
|
||||
num_threads = si_unpack_param(ctx, ctx->param_merged_wave_info, 0, 8);
|
||||
nested_barrier = false;
|
||||
}
|
||||
|
||||
LLVMValueRef ena =
|
||||
LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac), num_threads, "");
|
||||
lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena);
|
||||
|
||||
/* The barrier must execute for all shaders in a
|
||||
* threadgroup.
|
||||
*
|
||||
* Execute the barrier inside the conditional block,
|
||||
* so that empty waves can jump directly to s_endpgm,
|
||||
* which will also signal the barrier.
|
||||
*
|
||||
* If the shader is TCS and the TCS epilog is present
|
||||
* and contains a barrier, it will wait there and then
|
||||
* reach s_endpgm.
|
||||
*/
|
||||
si_llvm_emit_barrier(NULL, bld_base, NULL);
|
||||
if (nested_barrier) {
|
||||
/* Execute a barrier before the second shader in
|
||||
* a merged shader.
|
||||
*
|
||||
* Execute the barrier inside the conditional block,
|
||||
* so that empty waves can jump directly to s_endpgm,
|
||||
* which will also signal the barrier.
|
||||
*
|
||||
* This is possible in gfx9, because an empty wave
|
||||
* for the second shader does not participate in
|
||||
* the epilogue. With NGG, empty waves may still
|
||||
* be required to export data (e.g. GS output vertices),
|
||||
* so we cannot let them exit early.
|
||||
*
|
||||
* If the shader is TCS and the TCS epilog is present
|
||||
* and contains a barrier, it will wait there and then
|
||||
* reach s_endpgm.
|
||||
*/
|
||||
si_llvm_emit_barrier(NULL, bld_base, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6099,6 +6147,8 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info,
|
||||
} else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
|
||||
key->vs_prolog.as_es = 1;
|
||||
key->vs_prolog.num_merged_next_stage_vgprs = 5;
|
||||
} else if (shader_out->key.as_ngg) {
|
||||
key->vs_prolog.num_merged_next_stage_vgprs = 5;
|
||||
}
|
||||
|
||||
/* Enable loading the InstanceID VGPR. */
|
||||
@@ -7227,6 +7277,21 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
|
||||
key->vs_prolog.num_input_sgprs + i, "");
|
||||
}
|
||||
|
||||
struct lp_build_if_state wrap_if_state;
|
||||
LLVMValueRef original_ret = ret;
|
||||
bool wrapped = false;
|
||||
|
||||
if (key->vs_prolog.is_monolithic && key->vs_prolog.as_ngg) {
|
||||
LLVMValueRef num_threads;
|
||||
LLVMValueRef ena;
|
||||
|
||||
num_threads = si_unpack_param(ctx, 3, 0, 8);
|
||||
ena = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac), num_threads, "");
|
||||
lp_build_if(&wrap_if_state, &ctx->gallivm, ena);
|
||||
wrapped = true;
|
||||
}
|
||||
|
||||
/* Compute vertex load indices from instance divisors. */
|
||||
LLVMValueRef instance_divisor_constbuf = NULL;
|
||||
|
||||
@@ -7282,6 +7347,20 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
|
||||
fninfo.num_params + i, "");
|
||||
}
|
||||
|
||||
if (wrapped) {
|
||||
lp_build_endif(&wrap_if_state);
|
||||
|
||||
LLVMValueRef values[2] = {
|
||||
ret,
|
||||
original_ret
|
||||
};
|
||||
LLVMBasicBlockRef bbs[2] = {
|
||||
wrap_if_state.true_block,
|
||||
wrap_if_state.entry_block
|
||||
};
|
||||
ret = ac_build_phi(&ctx->ac, LLVMTypeOf(ret), 2, values, bbs);
|
||||
}
|
||||
|
||||
si_llvm_build_ret(ctx, ret);
|
||||
}
|
||||
|
||||
|
@@ -186,6 +186,13 @@ struct si_shader_context {
|
||||
int param_tes_rel_patch_id;
|
||||
/* HW ES */
|
||||
int param_es2gs_offset;
|
||||
/* HW GS */
|
||||
/* On gfx10:
|
||||
* - bits 0..10: ordered_wave_id
|
||||
* - bits 12..20: number of vertices in group
|
||||
* - bits 22..30: number of primitives in group
|
||||
*/
|
||||
LLVMValueRef gs_tg_info;
|
||||
/* API GS */
|
||||
int param_gs2vs_offset;
|
||||
int param_gs_wave_id; /* GFX6 */
|
||||
@@ -372,4 +379,8 @@ LLVMValueRef si_unpack_param(struct si_shader_context *ctx,
|
||||
unsigned param, unsigned rshift,
|
||||
unsigned bitwidth);
|
||||
|
||||
void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi,
|
||||
unsigned max_outputs,
|
||||
LLVMValueRef *addrs);
|
||||
|
||||
#endif
|
||||
|
@@ -1128,7 +1128,7 @@ void si_llvm_create_func(struct si_shader_context *ctx,
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
if (ctx->shader->key.as_ls)
|
||||
real_shader_type = PIPE_SHADER_TESS_CTRL;
|
||||
else if (ctx->shader->key.as_es)
|
||||
else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
|
||||
real_shader_type = PIPE_SHADER_GEOMETRY;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user