ac: add support for 16bit buffer loads
v2: Fixed dvec3 loads (bas) Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:

committed by
Bas Nieuwenhuizen

parent
a6a21e651d
commit
7e7ee82698
@@ -1600,63 +1600,78 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
|
|||||||
const nir_intrinsic_instr *instr)
|
const nir_intrinsic_instr *instr)
|
||||||
{
|
{
|
||||||
LLVMValueRef results[2];
|
LLVMValueRef results[2];
|
||||||
int load_components;
|
int load_bytes;
|
||||||
|
int elem_size_bytes = instr->dest.ssa.bit_size / 8;
|
||||||
int num_components = instr->num_components;
|
int num_components = instr->num_components;
|
||||||
if (instr->dest.ssa.bit_size == 64)
|
int num_bytes = num_components * elem_size_bytes;
|
||||||
num_components *= 2;
|
|
||||||
|
|
||||||
for (int i = 0; i < num_components; i += load_components) {
|
for (int i = 0; i < num_bytes; i += load_bytes) {
|
||||||
load_components = MIN2(num_components - i, 4);
|
load_bytes = MIN2(num_bytes - i, 16);
|
||||||
const char *load_name;
|
const char *load_name;
|
||||||
LLVMTypeRef data_type = ctx->ac.f32;
|
LLVMTypeRef data_type;
|
||||||
LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * 4, false);
|
LLVMValueRef offset = get_src(ctx, instr->src[1]);
|
||||||
offset = LLVMBuildAdd(ctx->ac.builder, get_src(ctx, instr->src[1]), offset, "");
|
LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i, false);
|
||||||
|
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
|
||||||
|
get_src(ctx, instr->src[0]), false);
|
||||||
|
LLVMValueRef vindex = ctx->ac.i32_0;
|
||||||
|
|
||||||
if (load_components == 3)
|
int idx = i ? 1 : 0;
|
||||||
data_type = LLVMVectorType(ctx->ac.f32, 4);
|
if (load_bytes == 2) {
|
||||||
else if (load_components > 1)
|
results[idx] = ac_build_tbuffer_load_short(&ctx->ac,
|
||||||
data_type = LLVMVectorType(ctx->ac.f32, load_components);
|
rsrc,
|
||||||
|
vindex,
|
||||||
if (load_components >= 3)
|
offset,
|
||||||
load_name = "llvm.amdgcn.buffer.load.v4f32";
|
ctx->ac.i32_0,
|
||||||
else if (load_components == 2)
|
immoffset);
|
||||||
load_name = "llvm.amdgcn.buffer.load.v2f32";
|
} else {
|
||||||
else if (load_components == 1)
|
switch (load_bytes) {
|
||||||
load_name = "llvm.amdgcn.buffer.load.f32";
|
case 16:
|
||||||
else
|
case 12:
|
||||||
unreachable("unhandled number of components");
|
load_name = "llvm.amdgcn.buffer.load.v4f32";
|
||||||
|
data_type = ctx->ac.v4f32;
|
||||||
LLVMValueRef params[] = {
|
break;
|
||||||
ctx->abi->load_ssbo(ctx->abi,
|
case 8:
|
||||||
get_src(ctx, instr->src[0]),
|
case 6:
|
||||||
false),
|
load_name = "llvm.amdgcn.buffer.load.v2f32";
|
||||||
ctx->ac.i32_0,
|
data_type = ctx->ac.v2f32;
|
||||||
offset,
|
break;
|
||||||
ctx->ac.i1false,
|
case 4:
|
||||||
ctx->ac.i1false,
|
load_name = "llvm.amdgcn.buffer.load.f32";
|
||||||
};
|
data_type = ctx->ac.f32;
|
||||||
|
break;
|
||||||
results[i > 0 ? 1 : 0] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
|
default:
|
||||||
|
unreachable("Malformed load buffer.");
|
||||||
|
}
|
||||||
|
LLVMValueRef params[] = {
|
||||||
|
rsrc,
|
||||||
|
vindex,
|
||||||
|
LLVMBuildAdd(ctx->ac.builder, offset, immoffset, ""),
|
||||||
|
ctx->ac.i1false,
|
||||||
|
ctx->ac.i1false,
|
||||||
|
};
|
||||||
|
results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
|
||||||
|
unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes;
|
||||||
|
LLVMTypeRef resTy = LLVMVectorType(LLVMIntType(instr->dest.ssa.bit_size), num_elems);
|
||||||
|
results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, "");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assume(results[0]);
|
assume(results[0]);
|
||||||
LLVMValueRef ret = results[0];
|
LLVMValueRef ret = results[0];
|
||||||
if (num_components > 4 || num_components == 3) {
|
if (num_bytes > 16 || num_components == 3) {
|
||||||
LLVMValueRef masks[] = {
|
LLVMValueRef masks[] = {
|
||||||
LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
|
LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
|
||||||
LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
|
LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
|
||||||
LLVMConstInt(ctx->ac.i32, 4, false), LLVMConstInt(ctx->ac.i32, 5, false),
|
|
||||||
LLVMConstInt(ctx->ac.i32, 6, false), LLVMConstInt(ctx->ac.i32, 7, false)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if (num_components == 6) {
|
if (num_bytes > 16 && num_components == 3) {
|
||||||
/* we end up with a v4f32 and v2f32 but shuffle fails on that */
|
/* we end up with a v4f32 and v2f32 but shuffle fails on that */
|
||||||
results[1] = ac_build_expand_to_vec4(&ctx->ac, results[1], 4);
|
results[1] = ac_build_expand_to_vec4(&ctx->ac, results[1], 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
|
LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
|
||||||
ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
|
ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
|
||||||
results[num_components > 4 ? 1 : 0], swizzle, "");
|
results[num_bytes > 16 ? 1 : 0], swizzle, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
return LLVMBuildBitCast(ctx->ac.builder, ret,
|
return LLVMBuildBitCast(ctx->ac.builder, ret,
|
||||||
|
Reference in New Issue
Block a user