radv: enable lowering of GS intrinsics for the LLVM backend
This replaces emit_vertex with: if (vertex_count < max_vertices) { emit_vertex_with_counter vertex_count ... vertex_count += 1 } Which is exactly what NIR->LLVM was doing but at NIR level. This pass is already called by ACO. pipeline-db changes on GFX10: Totals from affected shaders: SGPRS: 1952 -> 1912 (-2.05 %) VGPRS: 2112 -> 2044 (-3.22 %) Code Size: 189368 -> 185620 (-1.98 %) bytes Max Waves: 494 -> 491 (-0.61 %) No pipeline-db changes on other generations. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4182>
This commit is contained in:
@@ -3938,7 +3938,16 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
|
|||||||
case nir_intrinsic_emit_vertex:
|
case nir_intrinsic_emit_vertex:
|
||||||
ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
|
ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
|
||||||
break;
|
break;
|
||||||
|
case nir_intrinsic_emit_vertex_with_counter: {
|
||||||
|
unsigned stream = nir_intrinsic_stream_id(instr);
|
||||||
|
LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
|
||||||
|
ctx->abi->emit_vertex_with_counter(ctx->abi, stream,
|
||||||
|
next_vertex,
|
||||||
|
ctx->abi->outputs);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case nir_intrinsic_end_primitive:
|
case nir_intrinsic_end_primitive:
|
||||||
|
case nir_intrinsic_end_primitive_with_counter:
|
||||||
ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
|
ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
|
||||||
break;
|
break;
|
||||||
case nir_intrinsic_load_tess_coord:
|
case nir_intrinsic_load_tess_coord:
|
||||||
|
@@ -80,6 +80,11 @@ struct ac_shader_abi {
|
|||||||
void (*emit_primitive)(struct ac_shader_abi *abi,
|
void (*emit_primitive)(struct ac_shader_abi *abi,
|
||||||
unsigned stream);
|
unsigned stream);
|
||||||
|
|
||||||
|
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi,
|
||||||
|
unsigned stream,
|
||||||
|
LLVMValueRef vertexidx,
|
||||||
|
LLVMValueRef *addrs);
|
||||||
|
|
||||||
LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
|
LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
|
||||||
unsigned location,
|
unsigned location,
|
||||||
unsigned driver_location,
|
unsigned driver_location,
|
||||||
|
@@ -876,39 +876,21 @@ static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi)
|
|||||||
|
|
||||||
static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
|
static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
|
||||||
unsigned stream,
|
unsigned stream,
|
||||||
|
LLVMValueRef vertexidx,
|
||||||
LLVMValueRef *addrs);
|
LLVMValueRef *addrs);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
|
visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream,
|
||||||
|
LLVMValueRef vertexidx, LLVMValueRef *addrs)
|
||||||
{
|
{
|
||||||
LLVMValueRef gs_next_vertex;
|
|
||||||
LLVMValueRef can_emit;
|
|
||||||
unsigned offset = 0;
|
unsigned offset = 0;
|
||||||
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
||||||
|
|
||||||
if (ctx->args->options->key.vs_common_out.as_ngg) {
|
if (ctx->args->options->key.vs_common_out.as_ngg) {
|
||||||
gfx10_ngg_gs_emit_vertex(ctx, stream, addrs);
|
gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Write vertex attribute values to GSVS ring */
|
|
||||||
gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
|
|
||||||
ctx->gs_next_vertex[stream],
|
|
||||||
"");
|
|
||||||
|
|
||||||
/* If this thread has already emitted the declared maximum number of
|
|
||||||
* vertices, don't emit any more: excessive vertex emissions are not
|
|
||||||
* supposed to have any effect.
|
|
||||||
*/
|
|
||||||
can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex,
|
|
||||||
LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
|
|
||||||
|
|
||||||
bool use_kill = !ctx->args->shader_info->gs.writes_memory;
|
|
||||||
if (use_kill)
|
|
||||||
ac_build_kill_if_false(&ctx->ac, can_emit);
|
|
||||||
else
|
|
||||||
ac_build_ifcc(&ctx->ac, can_emit, 6505);
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
|
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
|
||||||
unsigned output_usage_mask =
|
unsigned output_usage_mask =
|
||||||
ctx->args->shader_info->gs.output_usage_mask[i];
|
ctx->args->shader_info->gs.output_usage_mask[i];
|
||||||
@@ -933,7 +915,7 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
|
|||||||
|
|
||||||
offset++;
|
offset++;
|
||||||
|
|
||||||
voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, "");
|
voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
|
||||||
voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
|
voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
|
||||||
|
|
||||||
out_val = ac_to_integer(&ctx->ac, out_val);
|
out_val = ac_to_integer(&ctx->ac, out_val);
|
||||||
@@ -949,16 +931,9 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex,
|
|
||||||
ctx->ac.i32_1, "");
|
|
||||||
LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex[stream]);
|
|
||||||
|
|
||||||
ac_build_sendmsg(&ctx->ac,
|
ac_build_sendmsg(&ctx->ac,
|
||||||
AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
|
AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
|
||||||
ctx->gs_wave_id);
|
ctx->gs_wave_id);
|
||||||
|
|
||||||
if (!use_kill)
|
|
||||||
ac_build_endif(&ctx->ac, 6505);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -3309,25 +3284,11 @@ static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
|
|||||||
|
|
||||||
static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
|
static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
|
||||||
unsigned stream,
|
unsigned stream,
|
||||||
|
LLVMValueRef vertexidx,
|
||||||
LLVMValueRef *addrs)
|
LLVMValueRef *addrs)
|
||||||
{
|
{
|
||||||
LLVMBuilderRef builder = ctx->ac.builder;
|
LLVMBuilderRef builder = ctx->ac.builder;
|
||||||
LLVMValueRef tmp;
|
LLVMValueRef tmp;
|
||||||
const LLVMValueRef vertexidx =
|
|
||||||
LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], "");
|
|
||||||
|
|
||||||
/* If this thread has already emitted the declared maximum number of
|
|
||||||
* vertices, skip the write: excessive vertex emissions are not
|
|
||||||
* supposed to have any effect.
|
|
||||||
*/
|
|
||||||
const LLVMValueRef can_emit =
|
|
||||||
LLVMBuildICmp(builder, LLVMIntULT, vertexidx,
|
|
||||||
LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
|
|
||||||
ac_build_ifcc(&ctx->ac, can_emit, 9001);
|
|
||||||
|
|
||||||
tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
|
|
||||||
tmp = LLVMBuildSelect(builder, can_emit, tmp, vertexidx, "");
|
|
||||||
LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
|
|
||||||
|
|
||||||
const LLVMValueRef vertexptr =
|
const LLVMValueRef vertexptr =
|
||||||
ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
|
ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
|
||||||
@@ -3359,6 +3320,13 @@ static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
|
|||||||
}
|
}
|
||||||
assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
|
assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
|
||||||
|
|
||||||
|
/* Store the current number of emitted vertices to zero out remaining
|
||||||
|
* primitive flags in case the geometry shader doesn't emit the maximum
|
||||||
|
* number of vertices.
|
||||||
|
*/
|
||||||
|
tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
|
||||||
|
LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
|
||||||
|
|
||||||
/* Determine and store whether this vertex completed a primitive. */
|
/* Determine and store whether this vertex completed a primitive. */
|
||||||
const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
|
const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
|
||||||
|
|
||||||
@@ -3395,8 +3363,6 @@ static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
|
|||||||
tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
|
tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
|
||||||
tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
|
tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
|
||||||
LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
|
LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
|
||||||
|
|
||||||
ac_build_endif(&ctx->ac, 9001);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -3948,7 +3914,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
|||||||
|
|
||||||
ctx.abi.inputs = &ctx.inputs[0];
|
ctx.abi.inputs = &ctx.inputs[0];
|
||||||
ctx.abi.emit_outputs = handle_shader_outputs_post;
|
ctx.abi.emit_outputs = handle_shader_outputs_post;
|
||||||
ctx.abi.emit_vertex = visit_emit_vertex;
|
ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
|
||||||
ctx.abi.load_ubo = radv_load_ubo;
|
ctx.abi.load_ubo = radv_load_ubo;
|
||||||
ctx.abi.load_ssbo = radv_load_ssbo;
|
ctx.abi.load_ssbo = radv_load_ssbo;
|
||||||
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
|
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
|
||||||
|
@@ -453,8 +453,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
|||||||
|
|
||||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||||
|
|
||||||
if (nir->info.stage == MESA_SHADER_GEOMETRY &&
|
if (nir->info.stage == MESA_SHADER_GEOMETRY)
|
||||||
device->physical_device->use_aco)
|
|
||||||
nir_lower_gs_intrinsics(nir, true);
|
nir_lower_gs_intrinsics(nir, true);
|
||||||
|
|
||||||
static const nir_lower_tex_options tex_options = {
|
static const nir_lower_tex_options tex_options = {
|
||||||
|
Reference in New Issue
Block a user