ac/nir: implement nir_op_fsat

With fmed3 if available, otherwise fallback to fmin/fmax.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6932>
This commit is contained in:
Samuel Pitoiset
2020-09-30 10:48:29 +02:00
committed by Marge Bot
parent 0747f21bb6
commit 31a0574b96
3 changed files with 52 additions and 0 deletions

View File

@@ -2447,6 +2447,50 @@ void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0); ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0);
} }
LLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src,
LLVMTypeRef type)
{
unsigned bitsize = ac_get_elem_bits(ctx, type);
LLVMValueRef zero = LLVMConstReal(type, 0.0);
LLVMValueRef one = LLVMConstReal(type, 1.0);
LLVMValueRef result;
if (bitsize == 64 || (bitsize == 16 && ctx->chip_class <= GFX8)) {
/* Use fmin/fmax for 64-bit fsat or 16-bit on GFX6-GFX8 because LLVM
* doesn't expose an intrinsic.
*/
result = ac_build_fmin(ctx, ac_build_fmax(ctx, src, zero), one);
} else {
LLVMTypeRef type;
char *intr;
if (bitsize == 16) {
intr = "llvm.amdgcn.fmed3.f16";
type = ctx->f16;
} else {
assert(bitsize == 32);
intr = "llvm.amdgcn.fmed3.f32";
type = ctx->f32;
}
LLVMValueRef params[] = {
zero,
one,
src,
};
result = ac_build_intrinsic(ctx, intr, type, params, 3,
AC_FUNC_ATTR_READNONE);
}
if (ctx->chip_class < GFX9 && bitsize == 32) {
/* Only pre-GFX9 chips do not flush denorms. */
result = ac_build_canonicalize(ctx, result, bitsize);
}
return result;
}
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize) LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
{ {
LLVMTypeRef type; LLVMTypeRef type;

View File

@@ -455,6 +455,9 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0);
LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src); LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src);
LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0); LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
LLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src,
LLVMTypeRef type);
LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0); LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0);
void ac_optimize_vs_outputs(struct ac_llvm_context *ac, LLVMValueRef main_fn, void ac_optimize_vs_outputs(struct ac_llvm_context *ac, LLVMValueRef main_fn,

View File

@@ -743,6 +743,11 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size); result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
} }
break; break;
case nir_op_fsat:
src[0] = ac_to_float(&ctx->ac, src[0]);
result = ac_build_fsat(&ctx->ac, src[0],
ac_to_float_type(&ctx->ac, def_type));
break;
case nir_op_iabs: case nir_op_iabs:
result = emit_iabs(&ctx->ac, src[0]); result = emit_iabs(&ctx->ac, src[0]);
break; break;