ac/llvm,radeonsi: lower nir primitive counter add intrinsics
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18010>
This commit is contained in:
@@ -4465,21 +4465,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||||||
result = ac_build_gather_values(&ctx->ac, global_count, instr->num_components);
|
result = ac_build_gather_values(&ctx->ac, global_count, instr->num_components);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
|
|
||||||
ctx->abi->atomic_add_prim_count(ctx->abi, ~0U, get_src(ctx, instr->src[0]),
|
|
||||||
ac_prim_count_gs_emit);
|
|
||||||
break;
|
|
||||||
case nir_intrinsic_atomic_add_gen_prim_count_amd:
|
|
||||||
case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
|
|
||||||
LLVMValueRef prim_count = get_src(ctx, instr->src[0]);
|
|
||||||
unsigned stream = nir_intrinsic_stream_id(instr);
|
|
||||||
enum ac_prim_count count_type =
|
|
||||||
instr->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
|
|
||||||
ac_prim_count_gen : ac_prim_count_xfb;
|
|
||||||
|
|
||||||
ctx->abi->atomic_add_prim_count(ctx->abi, stream, prim_count, count_type);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "Unknown intrinsic: ");
|
fprintf(stderr, "Unknown intrinsic: ");
|
||||||
nir_print_instr(&instr->instr, stderr);
|
nir_print_instr(&instr->instr, stderr);
|
||||||
|
@@ -34,12 +34,6 @@
|
|||||||
|
|
||||||
#define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
|
#define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
|
||||||
|
|
||||||
enum ac_prim_count {
|
|
||||||
ac_prim_count_gs_emit,
|
|
||||||
ac_prim_count_gen,
|
|
||||||
ac_prim_count_xfb,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Document the shader ABI during compilation. This is what allows radeonsi and
|
/* Document the shader ABI during compilation. This is what allows radeonsi and
|
||||||
* radv to share a compiler backend.
|
* radv to share a compiler backend.
|
||||||
*/
|
*/
|
||||||
@@ -73,9 +67,6 @@ struct ac_shader_abi {
|
|||||||
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
|
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
|
||||||
LLVMValueRef vertexidx, LLVMValueRef *addrs);
|
LLVMValueRef vertexidx, LLVMValueRef *addrs);
|
||||||
|
|
||||||
void (*atomic_add_prim_count)(struct ac_shader_abi *abi, unsigned stream,
|
|
||||||
LLVMValueRef prim_count, enum ac_prim_count count_type);
|
|
||||||
|
|
||||||
LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
|
LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
|
||||||
unsigned driver_location, unsigned component,
|
unsigned driver_location, unsigned component,
|
||||||
unsigned num_components, unsigned vertex_index,
|
unsigned num_components, unsigned vertex_index,
|
||||||
|
@@ -124,36 +124,6 @@ void gfx10_ngg_export_vertex(struct ac_shader_abi *abi)
|
|||||||
si_llvm_build_vs_exports(ctx, outputs, num_outputs);
|
si_llvm_build_vs_exports(ctx, outputs, num_outputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream,
|
|
||||||
LLVMValueRef prim_count, enum ac_prim_count count_type)
|
|
||||||
{
|
|
||||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
|
||||||
|
|
||||||
unsigned offset;
|
|
||||||
LLVMValueRef query_buf;
|
|
||||||
if (count_type == ac_prim_count_gs_emit) {
|
|
||||||
offset = si_query_pipestat_end_dw_offset(ctx->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4;
|
|
||||||
query_buf = ngg_get_emulated_counters_buf(ctx);
|
|
||||||
} else {
|
|
||||||
offset = count_type == ac_prim_count_gen ?
|
|
||||||
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) :
|
|
||||||
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives);
|
|
||||||
|
|
||||||
query_buf = ngg_get_query_buf(ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
LLVMValueRef args[] = {
|
|
||||||
prim_count,
|
|
||||||
query_buf,
|
|
||||||
LLVMConstInt(ctx->ac.i32, offset, false),
|
|
||||||
ctx->ac.i32_0, /* soffset */
|
|
||||||
ctx->ac.i32_0, /* cachepolicy */
|
|
||||||
};
|
|
||||||
|
|
||||||
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.raw.buffer.atomic.add.i32",
|
|
||||||
ctx->ac.i32, args, 5, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx)
|
void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx)
|
||||||
{
|
{
|
||||||
LLVMBuilderRef builder = ctx->ac.builder;
|
LLVMBuilderRef builder = ctx->ac.builder;
|
||||||
|
@@ -233,6 +233,26 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
|||||||
replacement = load_internal_binding(b, args, slot);
|
replacement = load_internal_binding(b, args, slot);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
|
||||||
|
case nir_intrinsic_atomic_add_gen_prim_count_amd:
|
||||||
|
case nir_intrinsic_atomic_add_xfb_prim_count_amd: {
|
||||||
|
unsigned offset;
|
||||||
|
nir_ssa_def *buf;
|
||||||
|
if (intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd) {
|
||||||
|
buf = load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF);
|
||||||
|
offset = si_query_pipestat_end_dw_offset(sel->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4;
|
||||||
|
} else {
|
||||||
|
unsigned stream = nir_intrinsic_stream_id(intrin);
|
||||||
|
buf = load_internal_binding(b, args, SI_GS_QUERY_BUF);
|
||||||
|
offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ?
|
||||||
|
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) :
|
||||||
|
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives);
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_ssa_def *prim_count = intrin->src[0].ssa;
|
||||||
|
nir_buffer_atomic_add_amd(b, 32, buf, prim_count, .base = offset);
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@@ -183,8 +183,6 @@ LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx);
|
|||||||
unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader);
|
unsigned gfx10_ngg_get_vertices_per_prim(struct si_shader *shader);
|
||||||
bool gfx10_ngg_export_prim_early(struct si_shader *shader);
|
bool gfx10_ngg_export_prim_early(struct si_shader *shader);
|
||||||
void gfx10_ngg_export_vertex(struct ac_shader_abi *abi);
|
void gfx10_ngg_export_vertex(struct ac_shader_abi *abi);
|
||||||
void gfx10_ngg_atomic_add_prim_count(struct ac_shader_abi *abi, unsigned stream,
|
|
||||||
LLVMValueRef prim_count, enum ac_prim_count count_type);
|
|
||||||
void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx);
|
void gfx10_ngg_gs_emit_begin(struct si_shader_context *ctx);
|
||||||
unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader);
|
unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader);
|
||||||
bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
|
bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
|
||||||
|
@@ -778,7 +778,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||||||
|
|
||||||
ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
|
ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
|
||||||
ctx->abi.export_vertex = gfx10_ngg_export_vertex;
|
ctx->abi.export_vertex = gfx10_ngg_export_vertex;
|
||||||
ctx->abi.atomic_add_prim_count = gfx10_ngg_atomic_add_prim_count;
|
|
||||||
|
|
||||||
si_llvm_init_resource_callbacks(ctx);
|
si_llvm_init_resource_callbacks(ctx);
|
||||||
si_llvm_create_main_func(ctx);
|
si_llvm_create_main_func(ctx);
|
||||||
|
Reference in New Issue
Block a user