amd: Use inverse ballot intrinsic if available

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25123>
This commit is contained in:
Connor Abbott
2019-02-01 12:36:56 +01:00
committed by Marge Bot
parent 4282386311
commit c93bcb32fe
3 changed files with 30 additions and 1 deletions

View File

@@ -8306,6 +8306,20 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
set_wqm(ctx);
break;
}
case nir_intrinsic_inverse_ballot: {
Temp src = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
Temp dst = get_ssa_temp(ctx, &instr->def);
assert(dst.size() == bld.lm.size());
if (src.size() > dst.size()) {
emit_extract_vector(ctx, src, 0, dst);
} else if (src.size() < dst.size()) {
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src, Operand::zero());
} else {
bld.copy(Definition(dst), src);
}
break;
}
case nir_intrinsic_shuffle:
case nir_intrinsic_read_invocation: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);

View File

@@ -3057,6 +3057,15 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
result = LLVMBuildZExt(ctx->ac.builder, result, dest_type, "");
}
break;
case nir_intrinsic_inverse_ballot: {
LLVMValueRef src = get_src(ctx, instr->src[0]);
if (instr->src[0].ssa->bit_size > ctx->ac.wave_size) {
LLVMTypeRef src_type = LLVMIntTypeInContext(ctx->ac.context, ctx->ac.wave_size);
src = LLVMBuildTrunc(ctx->ac.builder, src, src_type, "");
}
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.inverse.ballot", ctx->ac.i1, &src, 1, 0);
break;
}
case nir_intrinsic_read_invocation:
result =
ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));

View File

@@ -623,7 +623,13 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
NIR_PASS(_, nir, nir_lower_global_vars_to_local);
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
bool gfx7minus = device->physical_device->rad_info.gfx_level <= GFX7;
bool has_inverse_ballot = true;
#if LLVM_AVAILABLE
has_inverse_ballot = !radv_use_llvm_for_stage(device, nir->info.stage) || LLVM_VERSION_MAJOR >= 17;
#endif
NIR_PASS(_, nir, nir_lower_subgroups,
&(struct nir_lower_subgroups_options){
.subgroup_size = subgroup_size,
@@ -638,7 +644,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.lower_quad_broadcast_dynamic_to_const = gfx7minus,
.lower_shuffle_to_swizzle_amd = 1,
.lower_ballot_bit_count_to_mbcnt_amd = 1,
.lower_inverse_ballot = 1,
.lower_inverse_ballot = !has_inverse_ballot,
});
NIR_PASS(_, nir, nir_lower_load_const_to_scalar);