ac/llvm,radeonsi: lower nir primitive counter add intrinsics
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18010>
This commit is contained in:
@@ -4465,21 +4465,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
||||
result = ac_build_gather_values(&ctx->ac, global_count, instr->num_components);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
|
||||
ctx->abi->atomic_add_prim_count(ctx->abi, ~0U, get_src(ctx, instr->src[0]),
|
||||
ac_prim_count_gs_emit);
|
||||
break;
|
||||
case nir_intrinsic_atomic_add_gen_prim_count_amd:
|
||||
case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
|
||||
LLVMValueRef prim_count = get_src(ctx, instr->src[0]);
|
||||
unsigned stream = nir_intrinsic_stream_id(instr);
|
||||
enum ac_prim_count count_type =
|
||||
instr->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
|
||||
ac_prim_count_gen : ac_prim_count_xfb;
|
||||
|
||||
ctx->abi->atomic_add_prim_count(ctx->abi, stream, prim_count, count_type);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fprintf(stderr, "Unknown intrinsic: ");
|
||||
nir_print_instr(&instr->instr, stderr);
|
||||
|
@@ -34,12 +34,6 @@
|
||||
|
||||
#define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
|
||||
|
||||
enum ac_prim_count {
|
||||
ac_prim_count_gs_emit,
|
||||
ac_prim_count_gen,
|
||||
ac_prim_count_xfb,
|
||||
};
|
||||
|
||||
/* Document the shader ABI during compilation. This is what allows radeonsi and
|
||||
* radv to share a compiler backend.
|
||||
*/
|
||||
@@ -73,9 +67,6 @@ struct ac_shader_abi {
|
||||
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
|
||||
LLVMValueRef vertexidx, LLVMValueRef *addrs);
|
||||
|
||||
void (*atomic_add_prim_count)(struct ac_shader_abi *abi, unsigned stream,
|
||||
LLVMValueRef prim_count, enum ac_prim_count count_type);
|
||||
|
||||
LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
|
||||
unsigned driver_location, unsigned component,
|
||||
unsigned num_components, unsigned vertex_index,
|
||||
|
@@ -124,36 +124,6 @@ void gfx10_ngg_export_vertex(struct ac_shader_abi *abi)
|
||||
si_llvm_build_vs_exports(ctx, outputs, num_outputs);
|
||||
}
|
||||
|
||||
void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream,
|
||||
LLVMValueRef prim_count, enum ac_prim_count count_type)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
||||
unsigned offset;
|
||||
LLVMValueRef query_buf;
|
||||
if (count_type == ac_prim_count_gs_emit) {
|
||||
offset = si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4;
|
||||
query_buf = ngg_get_emulated_counters_buf(ctx);
|
||||
} else {
|
||||
offset = count_type == ac_prim_count_gen ?
|
||||
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) :
|
||||
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives);
|
||||
|
||||
query_buf = ngg_get_query_buf(ctx);
|
||||
}
|
||||
|
||||
LLVMValueRef args[] = {
|
||||
prim_count,
|
||||
query_buf,
|
||||
LLVMConstInt(ctx->ac.i32, offset, false),
|
||||
ctx->ac.i32_0, /* soffset */
|
||||
ctx->ac.i32_0, /* cachepolicy */
|
||||
};
|
||||
|
||||
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32",
|
||||
ctx->ac.i32, args, 5, 0);
|
||||
}
|
||||
|
||||
void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
|
@@ -233,6 +233,26 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
||||
replacement = load_internal_binding(b, args, slot);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
|
||||
case nir_intrinsic_atomic_add_gen_prim_count_amd:
|
||||
case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
|
||||
unsigned offset;
|
||||
nir_ssa_def *buf;
|
||||
if (intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd) {
|
||||
buf = load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF);
|
||||
offset = si_query_pipestat_end_dw_offset(sel->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4;
|
||||
} else {
|
||||
unsigned stream = nir_intrinsic_stream_id(intrin);
|
||||
buf = load_internal_binding(b, args, SI_GS_QUERY_BUF);
|
||||
offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
|
||||
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) :
|
||||
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives);
|
||||
}
|
||||
|
||||
nir_ssa_def *prim_count = intrin->src[0].ssa;
|
||||
nir_buffer_atomic_add_amd(b, 32, buf, prim_count, .base = offset);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@@ -183,8 +183,6 @@ LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx);
|
||||
unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader);
|
||||
bool gfx10_ngg_export_prim_early(struct si_shader *shader);
|
||||
void gfx10_ngg_export_vertex(struct ac_shader_abi *abi);
|
||||
void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream,
|
||||
LLVMValueRef prim_count, enum ac_prim_count count_type);
|
||||
void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx);
|
||||
unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader);
|
||||
bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
|
||||
|
@@ -778,7 +778,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
||||
|
||||
ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
|
||||
ctx->abi.export_vertex = gfx10_ngg_export_vertex;
|
||||
ctx->abi.atomic_add_prim_count = gfx10_ngg_atomic_add_prim_count;
|
||||
|
||||
si_llvm_init_resource_callbacks(ctx);
|
||||
si_llvm_create_main_func(ctx);
|
||||
|
Reference in New Issue
Block a user