gallivm: Prefer backend agnostic intrinsic for rounding.

We could unconditionally use these instrinsics, but performance with SSE2
would suck, as LLVM falls back to calling libm.

lp_test_arit.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
Jose Fonseca
2016-04-01 11:50:28 +01:00
parent 324451e73f
commit e4f01da15d

View File

@@ -1675,13 +1675,13 @@ enum lp_build_round_mode
* result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0. * result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0.
*/ */
static inline LLVMValueRef static inline LLVMValueRef
lp_build_round_sse41(struct lp_build_context *bld, lp_build_nearest_sse41(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a)
enum lp_build_round_mode mode)
{ {
LLVMBuilderRef builder = bld->gallivm->builder; LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type; const struct lp_type type = bld->type;
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
LLVMValueRef mode = LLVMConstNull(i32t);
const char *intrinsic; const char *intrinsic;
LLVMValueRef res; LLVMValueRef res;
@@ -1714,7 +1714,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
args[0] = undef; args[0] = undef;
args[1] = LLVMBuildInsertElement(builder, undef, a, index0, ""); args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
args[2] = LLVMConstInt(i32t, mode, 0); args[2] = mode;
res = lp_build_intrinsic(builder, intrinsic, res = lp_build_intrinsic(builder, intrinsic,
vec_type, args, Elements(args), 0); vec_type, args, Elements(args), 0);
@@ -1754,7 +1754,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
res = lp_build_intrinsic_binary(builder, intrinsic, res = lp_build_intrinsic_binary(builder, intrinsic,
bld->vec_type, a, bld->vec_type, a,
LLVMConstInt(i32t, mode, 0)); mode);
} }
return res; return res;
@@ -1856,8 +1856,40 @@ lp_build_round_arch(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a,
enum lp_build_round_mode mode) enum lp_build_round_mode mode)
{ {
if (util_cpu_caps.has_sse4_1) if (util_cpu_caps.has_sse4_1) {
return lp_build_round_sse41(bld, a, mode); LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
const char *intrinsic_root;
char intrinsic[32];
assert(type.floating);
assert(lp_check_value(type, a));
(void)type;
switch (mode) {
case LP_BUILD_ROUND_NEAREST:
if (HAVE_LLVM >= 0x0304) {
intrinsic_root = "llvm.round";
} else {
return lp_build_nearest_sse41(bld, a);
}
break;
case LP_BUILD_ROUND_FLOOR:
intrinsic_root = "llvm.floor";
break;
case LP_BUILD_ROUND_CEIL:
intrinsic_root = "llvm.ceil";
break;
case LP_BUILD_ROUND_TRUNCATE:
intrinsic_root = "llvm.trunc";
break;
}
util_snprintf(intrinsic, sizeof intrinsic, "%s.v%uf%u",
intrinsic_root, type.length, type.width);
return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
}
else /* (util_cpu_caps.has_altivec) */ else /* (util_cpu_caps.has_altivec) */
return lp_build_round_altivec(bld, a, mode); return lp_build_round_altivec(bld, a, mode);
} }