ac,ac/nir: use a better sync scope for shared atomics
https://reviews.llvm.org/rL356946 (present in LLVM 9 and later) changed the meaning of the "system" sync scope, making it no longer restricted to the memory operation's address space. So a single address space sync scope is needed for shared atomic operations (such as "system-one-as" or "workgroup-one-as") otherwise buffer_wbinvl1 and s_waitcnt instructions can be created at each shared atomic operation. This mostly reimplements LLVMBuildAtomicRMW and LLVMBuildAtomicCmpXchg to allow for more sync scopes and uses the new functions in ac->nir with the "workgroup-one-as" or "workgroup" sync scopes. F1 2017 (4K, Ultra High settings, TAA), avg FPS : 59 -> 59.67 (+1.14%) Strange Brigade (4K, ~highest settings), avg FPS : 51.5 -> 51.6 (+0.19%) RotTR/mountain (4K, VeryHigh settings, FXAA), avg FPS : 57.2 -> 57.2 (+0.0%) RotTR/tomb (4K, VeryHigh settings, FXAA), avg FPS : 42.5 -> 43.0 (+1.17%) RotTR/valley (4K, VeryHigh settings, FXAA), avg FPS : 40.7 -> 41.6 (+2.21%) Warhammer II/fallen, avg FPS : 31.63 -> 31.83 (+0.63%) Warhammer II/skaven, avg FPS : 37.77 -> 38.07 (+0.79%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
@@ -26,7 +26,7 @@
|
||||
#define AC_LLVM_BUILD_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <llvm-c/TargetMachine.h>
|
||||
#include <llvm-c/Core.h>
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "amd_family.h"
|
||||
|
||||
@@ -694,6 +694,14 @@ ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
|
||||
LLVMValueRef
|
||||
ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
|
||||
|
||||
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
|
||||
LLVMValueRef ptr, LLVMValueRef val,
|
||||
const char *sync_scope);
|
||||
|
||||
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
|
||||
LLVMValueRef cmp, LLVMValueRef val,
|
||||
const char *sync_scope);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -31,6 +31,7 @@
|
||||
|
||||
#include "ac_binary.h"
|
||||
#include "ac_llvm_util.h"
|
||||
#include "ac_llvm_build.h"
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
#include <llvm/Target/TargetMachine.h>
|
||||
@@ -167,3 +168,61 @@ void ac_enable_global_isel(LLVMTargetMachineRef tm)
|
||||
{
|
||||
reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
|
||||
LLVMValueRef ptr, LLVMValueRef val,
|
||||
const char *sync_scope) {
|
||||
llvm::AtomicRMWInst::BinOp binop;
|
||||
switch (op) {
|
||||
case LLVMAtomicRMWBinOpXchg:
|
||||
binop = llvm::AtomicRMWInst::Xchg;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpAdd:
|
||||
binop = llvm::AtomicRMWInst::Add;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpSub:
|
||||
binop = llvm::AtomicRMWInst::Sub;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpAnd:
|
||||
binop = llvm::AtomicRMWInst::And;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpNand:
|
||||
binop = llvm::AtomicRMWInst::Nand;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpOr:
|
||||
binop = llvm::AtomicRMWInst::Or;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpXor:
|
||||
binop = llvm::AtomicRMWInst::Xor;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpMax:
|
||||
binop = llvm::AtomicRMWInst::Max;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpMin:
|
||||
binop = llvm::AtomicRMWInst::Min;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpUMax:
|
||||
binop = llvm::AtomicRMWInst::UMax;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpUMin:
|
||||
binop = llvm::AtomicRMWInst::UMin;
|
||||
break;
|
||||
default:
|
||||
unreachable(!"invalid LLVMAtomicRMWBinOp");
|
||||
break;
|
||||
}
|
||||
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
||||
return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
|
||||
binop, llvm::unwrap(ptr), llvm::unwrap(val),
|
||||
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
|
||||
LLVMValueRef cmp, LLVMValueRef val,
|
||||
const char *sync_scope) {
|
||||
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
||||
return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
|
||||
llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
|
||||
llvm::AtomicOrdering::SequentiallyConsistent,
|
||||
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
||||
}
|
||||
|
@@ -2897,14 +2897,12 @@ static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
|
||||
LLVMValueRef result;
|
||||
LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
|
||||
|
||||
const char *sync_scope = HAVE_LLVM >= 0x0900 ? "workgroup-one-as" : "workgroup";
|
||||
|
||||
if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap ||
|
||||
instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) {
|
||||
LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
|
||||
result = LLVMBuildAtomicCmpXchg(ctx->ac.builder,
|
||||
ptr, src, src1,
|
||||
LLVMAtomicOrderingSequentiallyConsistent,
|
||||
LLVMAtomicOrderingSequentiallyConsistent,
|
||||
false);
|
||||
result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
|
||||
result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
|
||||
} else {
|
||||
LLVMAtomicRMWBinOp op;
|
||||
@@ -2949,9 +2947,7 @@ static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result = LLVMBuildAtomicRMW(ctx->ac.builder, op, ptr, ac_to_integer(&ctx->ac, src),
|
||||
LLVMAtomicOrderingSequentiallyConsistent,
|
||||
false);
|
||||
result = ac_build_atomic_rmw(&ctx->ac, op, ptr, ac_to_integer(&ctx->ac, src), sync_scope);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
Reference in New Issue
Block a user