gallivm: Prefer backend agnostic intrinsic for rounding.
We could unconditionally use these instrinsics, but performance with SSE2 would suck, as LLVM falls back to calling libm. lp_test_arit. Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
@@ -1675,13 +1675,13 @@ enum lp_build_round_mode
|
|||||||
* result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0.
|
* result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0.
|
||||||
*/
|
*/
|
||||||
static inline LLVMValueRef
|
static inline LLVMValueRef
|
||||||
lp_build_round_sse41(struct lp_build_context *bld,
|
lp_build_nearest_sse41(struct lp_build_context *bld,
|
||||||
LLVMValueRef a,
|
LLVMValueRef a)
|
||||||
enum lp_build_round_mode mode)
|
|
||||||
{
|
{
|
||||||
LLVMBuilderRef builder = bld->gallivm->builder;
|
LLVMBuilderRef builder = bld->gallivm->builder;
|
||||||
const struct lp_type type = bld->type;
|
const struct lp_type type = bld->type;
|
||||||
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
|
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
|
||||||
|
LLVMValueRef mode = LLVMConstNull(i32t);
|
||||||
const char *intrinsic;
|
const char *intrinsic;
|
||||||
LLVMValueRef res;
|
LLVMValueRef res;
|
||||||
|
|
||||||
@@ -1714,7 +1714,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
|
|||||||
|
|
||||||
args[0] = undef;
|
args[0] = undef;
|
||||||
args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
|
args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
|
||||||
args[2] = LLVMConstInt(i32t, mode, 0);
|
args[2] = mode;
|
||||||
|
|
||||||
res = lp_build_intrinsic(builder, intrinsic,
|
res = lp_build_intrinsic(builder, intrinsic,
|
||||||
vec_type, args, Elements(args), 0);
|
vec_type, args, Elements(args), 0);
|
||||||
@@ -1754,7 +1754,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
|
|||||||
|
|
||||||
res = lp_build_intrinsic_binary(builder, intrinsic,
|
res = lp_build_intrinsic_binary(builder, intrinsic,
|
||||||
bld->vec_type, a,
|
bld->vec_type, a,
|
||||||
LLVMConstInt(i32t, mode, 0));
|
mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
@@ -1856,8 +1856,40 @@ lp_build_round_arch(struct lp_build_context *bld,
|
|||||||
LLVMValueRef a,
|
LLVMValueRef a,
|
||||||
enum lp_build_round_mode mode)
|
enum lp_build_round_mode mode)
|
||||||
{
|
{
|
||||||
if (util_cpu_caps.has_sse4_1)
|
if (util_cpu_caps.has_sse4_1) {
|
||||||
return lp_build_round_sse41(bld, a, mode);
|
LLVMBuilderRef builder = bld->gallivm->builder;
|
||||||
|
const struct lp_type type = bld->type;
|
||||||
|
const char *intrinsic_root;
|
||||||
|
char intrinsic[32];
|
||||||
|
|
||||||
|
assert(type.floating);
|
||||||
|
assert(lp_check_value(type, a));
|
||||||
|
(void)type;
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case LP_BUILD_ROUND_NEAREST:
|
||||||
|
if (HAVE_LLVM >= 0x0304) {
|
||||||
|
intrinsic_root = "llvm.round";
|
||||||
|
} else {
|
||||||
|
return lp_build_nearest_sse41(bld, a);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case LP_BUILD_ROUND_FLOOR:
|
||||||
|
intrinsic_root = "llvm.floor";
|
||||||
|
break;
|
||||||
|
case LP_BUILD_ROUND_CEIL:
|
||||||
|
intrinsic_root = "llvm.ceil";
|
||||||
|
break;
|
||||||
|
case LP_BUILD_ROUND_TRUNCATE:
|
||||||
|
intrinsic_root = "llvm.trunc";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
util_snprintf(intrinsic, sizeof intrinsic, "%s.v%uf%u",
|
||||||
|
intrinsic_root, type.length, type.width);
|
||||||
|
|
||||||
|
return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
|
||||||
|
}
|
||||||
else /* (util_cpu_caps.has_altivec) */
|
else /* (util_cpu_caps.has_altivec) */
|
||||||
return lp_build_round_altivec(bld, a, mode);
|
return lp_build_round_altivec(bld, a, mode);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user