ac/llvm,radeonsi: lower nir primitive counter add intrinsics

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18010>
This commit is contained in:
Qiang Yu
2022-08-11 10:19:47 +08:00
committed by Marge Bot
parent bb837bf6ef
commit daaa8ddb8e
6 changed files with 20 additions and 57 deletions

View File

@@ -4465,21 +4465,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
result = ac_build_gather_values(&ctx->ac, global_count, instr->num_components);
break;
}
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
ctx->abi->atomic_add_prim_count(ctx->abi, ~0U, get_src(ctx, instr->src[0]),
ac_prim_count_gs_emit);
break;
case nir_intrinsic_atomic_add_gen_prim_count_amd:
case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
LLVMValueRef prim_count = get_src(ctx, instr->src[0]);
unsigned stream = nir_intrinsic_stream_id(instr);
enum ac_prim_count count_type =
instr->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
ac_prim_count_gen : ac_prim_count_xfb;
ctx->abi->atomic_add_prim_count(ctx->abi, stream, prim_count, count_type);
break;
}
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);

View File

@@ -34,12 +34,6 @@
#define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
enum ac_prim_count {
ac_prim_count_gs_emit,
ac_prim_count_gen,
ac_prim_count_xfb,
};
/* Document the shader ABI during compilation. This is what allows radeonsi and
* radv to share a compiler backend.
*/
@@ -73,9 +67,6 @@ struct ac_shader_abi {
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
LLVMValueRef vertexidx, LLVMValueRef *addrs);
void (*atomic_add_prim_count)(struct ac_shader_abi *abi, unsigned stream,
LLVMValueRef prim_count, enum ac_prim_count count_type);
LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
unsigned driver_location, unsigned component,
unsigned num_components, unsigned vertex_index,

View File

@@ -124,36 +124,6 @@ void gfx10_ngg_export_vertex(struct ac_shader_abi *abi)
si_llvm_build_vs_exports(ctx, outputs, num_outputs);
}
void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream,
LLVMValueRef prim_count, enum ac_prim_count count_type)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
unsigned offset;
LLVMValueRef query_buf;
if (count_type == ac_prim_count_gs_emit) {
offset = si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4;
query_buf = ngg_get_emulated_counters_buf(ctx);
} else {
offset = count_type == ac_prim_count_gen ?
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) :
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives);
query_buf = ngg_get_query_buf(ctx);
}
LLVMValueRef args[] = {
prim_count,
query_buf,
LLVMConstInt(ctx->ac.i32, offset, false),
ctx->ac.i32_0, /* soffset */
ctx->ac.i32_0, /* cachepolicy */
};
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32",
ctx->ac.i32, args, 5, 0);
}
void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx)
{
LLVMBuilderRef builder = ctx->ac.builder;

View File

@@ -233,6 +233,26 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s
replacement = load_internal_binding(b, args, slot);
break;
}
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
case nir_intrinsic_atomic_add_gen_prim_count_amd:
case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
unsigned offset;
nir_ssa_def *buf;
if (intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd) {
buf = load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF);
offset = si_query_pipestat_end_dw_offset(sel->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4;
} else {
unsigned stream = nir_intrinsic_stream_id(intrin);
buf = load_internal_binding(b, args, SI_GS_QUERY_BUF);
offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) :
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives);
}
nir_ssa_def *prim_count = intrin->src[0].ssa;
nir_buffer_atomic_add_amd(b, 32, buf, prim_count, .base = offset);
break;
}
default:
return false;
}

View File

@@ -183,8 +183,6 @@ LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx);
unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader);
bool gfx10_ngg_export_prim_early(struct si_shader *shader);
void gfx10_ngg_export_vertex(struct ac_shader_abi *abi);
void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream,
LLVMValueRef prim_count, enum ac_prim_count count_type);
void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx);
unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader);
bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);

View File

@@ -778,7 +778,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
ctx->abi.export_vertex = gfx10_ngg_export_vertex;
ctx->abi.atomic_add_prim_count = gfx10_ngg_atomic_add_prim_count;
si_llvm_init_resource_callbacks(ctx);
si_llvm_create_main_func(ctx);