amd/common: scan/reduce across waves of a workgroup
Order-aware scan/reduce can trade-off LDS traffic for external atomics memory traffic in producer/consumer compute shaders. Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
@@ -524,6 +524,42 @@ ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op
|
||||
LLVMValueRef
|
||||
ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
|
||||
|
||||
/**
|
||||
* Common arguments for a scan/reduce operation that accumulates per-wave
|
||||
* values across an entire workgroup, while respecting the order of waves.
|
||||
*/
|
||||
struct ac_wg_scan {
|
||||
bool enable_reduce;
|
||||
bool enable_exclusive;
|
||||
bool enable_inclusive;
|
||||
nir_op op;
|
||||
LLVMValueRef src; /* clobbered! */
|
||||
LLVMValueRef result_reduce;
|
||||
LLVMValueRef result_exclusive;
|
||||
LLVMValueRef result_inclusive;
|
||||
LLVMValueRef extra;
|
||||
LLVMValueRef waveidx;
|
||||
LLVMValueRef numwaves; /* only needed for "reduce" operations */
|
||||
|
||||
/* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
|
||||
LLVMValueRef scratch;
|
||||
unsigned maxwaves;
|
||||
};
|
||||
|
||||
void
|
||||
ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
|
||||
void
|
||||
ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
|
||||
void
|
||||
ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
|
||||
|
||||
void
|
||||
ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
|
||||
void
|
||||
ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
|
||||
void
|
||||
ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
|
||||
|
||||
LLVMValueRef
|
||||
ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
|
||||
unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
|
||||
|
Reference in New Issue
Block a user