amd/common: lower bitfield_insert to bfm & bitfield_select

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:
Daniel Schürmann
2019-01-25 16:08:38 +01:00
committed by Daniel Schürmann
parent a8b0b6e52b
commit 48a75e7af0
3 changed files with 27 additions and 26 deletions

View File

@@ -455,34 +455,30 @@ static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
return result; return result;
} }
static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx, static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx,
LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef bits, LLVMValueRef offset)
LLVMValueRef src2, LLVMValueRef src3)
{ {
LLVMValueRef bfi_args[3], result; /* mask = ((1 << bits) - 1) << offset */
return LLVMBuildShl(ctx->builder,
bfi_args[0] = LLVMBuildShl(ctx->builder,
LLVMBuildSub(ctx->builder, LLVMBuildSub(ctx->builder,
LLVMBuildShl(ctx->builder, LLVMBuildShl(ctx->builder,
ctx->i32_1, ctx->i32_1,
src3, ""), bits, ""),
ctx->i32_1, ""), ctx->i32_1, ""),
src2, ""); offset, "");
bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, ""); }
bfi_args[2] = src0;
LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx,
LLVMValueRef mask, LLVMValueRef insert,
LLVMValueRef base)
{
/* Calculate: /* Calculate:
* (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
* Use the right-hand side, which the LLVM backend can convert to V_BFI. * Use the right-hand side, which the LLVM backend can convert to V_BFI.
*/ */
result = LLVMBuildXor(ctx->builder, bfi_args[2], return LLVMBuildXor(ctx->builder, base,
LLVMBuildAnd(ctx->builder, bfi_args[0], LLVMBuildAnd(ctx->builder, mask,
LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), ""); LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
return result;
} }
static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx, static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
@@ -835,15 +831,18 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
else else
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE); result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
break; break;
case nir_op_bfm:
result = emit_bfm(&ctx->ac, src[0], src[1]);
break;
case nir_op_bitfield_select:
result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
break;
case nir_op_ibitfield_extract: case nir_op_ibitfield_extract:
result = emit_bitfield_extract(&ctx->ac, true, src); result = emit_bitfield_extract(&ctx->ac, true, src);
break; break;
case nir_op_ubitfield_extract: case nir_op_ubitfield_extract:
result = emit_bitfield_extract(&ctx->ac, false, src); result = emit_bitfield_extract(&ctx->ac, false, src);
break; break;
case nir_op_bitfield_insert:
result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
break;
case nir_op_bitfield_reverse: case nir_op_bitfield_reverse:
result = ac_build_bitfield_reverse(&ctx->ac, src[0]); result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
break; break;

View File

@@ -58,6 +58,7 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_device_index_to_zero = true, .lower_device_index_to_zero = true,
.lower_fsat = true, .lower_fsat = true,
.lower_fdiv = true, .lower_fdiv = true,
.lower_bitfield_insert_to_bitfield_select = true,
.lower_sub = true, .lower_sub = true,
.lower_pack_snorm_2x16 = true, .lower_pack_snorm_2x16 = true,
.lower_pack_snorm_4x8 = true, .lower_pack_snorm_4x8 = true,

View File

@@ -487,6 +487,7 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_flrp64 = true, .lower_flrp64 = true,
.lower_fsat = true, .lower_fsat = true,
.lower_fdiv = true, .lower_fdiv = true,
.lower_bitfield_insert_to_bitfield_select = true,
.lower_sub = true, .lower_sub = true,
.lower_ffma = true, .lower_ffma = true,
.lower_fmod = true, .lower_fmod = true,