radeonsi/ac: move most of emit_ddxy to shared code.

We can reuse this in radv.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Dave Airlie
2017-02-02 09:52:52 +10:00
parent 278d5ef70a
commit c9a2fc3679
3 changed files with 93 additions and 71 deletions

View File

@@ -851,3 +851,77 @@ ac_get_thread_id(struct ac_llvm_context *ctx)
set_range_metadata(ctx, tid, 0, 64);
return tid;
}
/*
* SI implements derivatives using the local data store (LDS)
* All writes to the LDS happen in all executing threads at
* the same time. TID is the Thread ID for the current
* thread and is a value between 0 and 63, representing
* the thread's position in the wavefront.
*
* For the pixel shader threads are grouped into quads of four pixels.
* The TIDs of the pixels of a quad are:
*
* +------+------+
* |4n + 0|4n + 1|
* +------+------+
* |4n + 2|4n + 3|
* +------+------+
*
* So, masking the TID with 0xfffffffc yields the TID of the top left pixel
* of the quad, masking with 0xfffffffd yields the TID of the top pixel of
* the current pixel's column, and masking with 0xfffffffe yields the TID
* of the left pixel of the current pixel's row.
*
* Adding 1 yields the TID of the pixel to the right of the left pixel, and
* adding 2 yields the TID of the pixel below the top pixel.
*/
LLVMValueRef
ac_emit_ddxy(struct ac_llvm_context *ctx,
bool has_ds_bpermute,
uint32_t mask,
int idx,
LLVMValueRef lds,
LLVMValueRef val)
{
LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
LLVMValueRef result;
thread_id = ac_get_thread_id(ctx);
tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
LLVMConstInt(ctx->i32, mask, false), "");
trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
LLVMConstInt(ctx->i32, idx, false), "");
if (has_ds_bpermute) {
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
LLVMConstInt(ctx->i32, 4, false), "");
args[1] = val;
tl = ac_emit_llvm_intrinsic(ctx,
"llvm.amdgcn.ds.bpermute", ctx->i32,
args, 2, AC_FUNC_ATTR_READNONE);
args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
LLVMConstInt(ctx->i32, 4, false), "");
trbl = ac_emit_llvm_intrinsic(ctx,
"llvm.amdgcn.ds.bpermute", ctx->i32,
args, 2, AC_FUNC_ATTR_READNONE);
} else {
LLVMValueRef store_ptr, load_ptr0, load_ptr1;
store_ptr = ac_build_gep0(ctx, lds, thread_id);
load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
LLVMBuildStore(ctx->builder, val, store_ptr);
tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
}
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
return result;
}