ac/llvm: add support for 16-bit source operands for samplers

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9395>
This commit is contained in:
Marek Olšák
2021-02-11 03:22:00 -05:00
committed by Marge Bot
parent c393ae9d84
commit 3475c79328
3 changed files with 57 additions and 13 deletions

View File

@@ -2164,6 +2164,29 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
assert(!a->d16 || (ctx->chip_class >= GFX8 && a->opcode != ac_image_atomic &&
a->opcode != ac_image_atomic_cmpswap && a->opcode != ac_image_get_lod &&
a->opcode != ac_image_get_resinfo));
assert(!a->a16 || ctx->chip_class >= GFX9);
assert(a->g16 == a->a16 || ctx->chip_class >= GFX10);
assert(!a->offset ||
ac_get_elem_bits(ctx, LLVMTypeOf(a->offset)) == 32);
assert(!a->bias ||
ac_get_elem_bits(ctx, LLVMTypeOf(a->bias)) == 32);
assert(!a->compare ||
ac_get_elem_bits(ctx, LLVMTypeOf(a->compare)) == 32);
assert(!a->derivs[0] ||
((!a->g16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->derivs[0])) == 16) &&
(a->g16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->derivs[0])) == 32)));
assert(!a->coords[0] ||
((!a->a16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])) == 16) &&
(a->a16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])) == 32)));
assert(!a->lod ||
((a->opcode != ac_image_get_resinfo || ac_get_elem_bits(ctx, LLVMTypeOf(a->lod))) &&
(a->opcode == ac_image_get_resinfo ||
ac_get_elem_bits(ctx, LLVMTypeOf(a->lod)) ==
ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])))));
assert(!a->min_lod ||
ac_get_elem_bits(ctx, LLVMTypeOf(a->min_lod)) ==
ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])));
if (a->opcode == ac_image_get_lod) {
switch (dim) {
@@ -2184,7 +2207,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
bool atomic = a->opcode == ac_image_atomic || a->opcode == ac_image_atomic_cmpswap;
bool load = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
a->opcode == ac_image_load || a->opcode == ac_image_load_mip;
LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32;
LLVMTypeRef coord_type = sample ? (a->a16 ? ctx->f16 : ctx->f32) : (a->a16 ? ctx->i16 : ctx->i32);
uint8_t dmask = a->dmask;
LLVMTypeRef data_type;
char data_type_str[32];
@@ -2225,7 +2248,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
unsigned count = ac_num_derivs(dim);
for (unsigned i = 0; i < count; ++i)
args[num_args++] = ac_to_float(ctx, a->derivs[i]);
overload[num_overloads++] = ".f32";
overload[num_overloads++] = a->g16 ? ".f16" : ".f32";
}
unsigned num_coords = a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0;
for (unsigned i = 0; i < num_coords; ++i)
@@ -2235,7 +2258,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
if (a->min_lod)
args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, "");
overload[num_overloads++] = sample ? ".f32" : ".i32";
overload[num_overloads++] = sample ? (a->a16 ? ".f16" : ".f32") : (a->a16 ? ".i16" : ".i32");
args[num_args++] = a->resource;
if (sample) {
@@ -3373,6 +3396,7 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LL
fmask_load.coords[1] = addr[1];
if (is_array_tex)
fmask_load.coords[2] = addr[2];
fmask_load.a16 = ac_get_elem_bits(ac, LLVMTypeOf(addr[0])) == 16;
LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, "");
@@ -3380,11 +3404,15 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LL
/* Apply the formula. */
unsigned sample_chan = is_array_tex ? 3 : 2;
LLVMValueRef final_sample;
final_sample = LLVMBuildMul(ac->builder, addr[sample_chan], LLVMConstInt(ac->i32, 4, 0), "");
final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, "");
final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
LLVMConstInt(LLVMTypeOf(addr[0]), 4, 0), "");
final_sample = LLVMBuildLShr(ac->builder, fmask_value,
LLVMBuildZExt(ac->builder, final_sample, ac->i32, ""), "");
/* Mask the sample index by 0x7, because 0x8 means an unknown value
* with EQAA, so those will map to 0. */
final_sample = LLVMBuildAnd(ac->builder, final_sample, LLVMConstInt(ac->i32, 0x7, 0), "");
if (fmask_load.a16)
final_sample = LLVMBuildTrunc(ac->builder, final_sample, ac->i16, "");
/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
* resource descriptor is 0 (invalid).

View File

@@ -403,7 +403,9 @@ struct ac_image_args {
unsigned cache_policy : 3;
bool unorm : 1;
bool level_zero : 1;
bool d16 : 1; /* data and return values are 16-bit, requires GFX8+ */
bool d16 : 1; /* GFX8+: data and return values are 16-bit */
bool a16 : 1; /* GFX9+: address components except compare, offset and bias are 16-bit */
bool g16 : 1; /* GFX10+: derivatives are 16-bit; GFX<=9: must be equal to a16 */
bool tfe : 1;
unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */

View File

@@ -1430,6 +1430,11 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_te
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
assert(instr->dest.is_ssa);
/* Buffers don't support A16. */
if (args->a16)
args->coords[0] = LLVMBuildZExt(ctx->ac.builder, args->coords[0], ctx->ac.i32, "");
return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0,
util_last_bit(mask), 0, true,
instr->dest.ssa.bit_size == 16,
@@ -4179,6 +4184,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
switch (instr->src[i].src_type) {
case nir_tex_src_coord: {
LLVMValueRef coord = get_src(ctx, instr->src[i].src);
args.a16 = instr->src[i].src.ssa->bit_size == 16;
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
break;
@@ -4189,22 +4195,25 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
if (instr->is_shadow) {
args.compare = get_src(ctx, instr->src[i].src);
args.compare = ac_to_float(&ctx->ac, args.compare);
assert(instr->src[i].src.ssa->bit_size == 32);
}
break;
case nir_tex_src_offset:
args.offset = get_src(ctx, instr->src[i].src);
offset_src = i;
/* We pack it with bit shifts, so we need it to be 32-bit. */
assert(ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.offset)) == 32);
break;
case nir_tex_src_bias:
args.bias = get_src(ctx, instr->src[i].src);
assert(ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.bias)) == 32);
break;
case nir_tex_src_lod: {
case nir_tex_src_lod:
if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
args.level_zero = true;
else
args.lod = get_src(ctx, instr->src[i].src);
break;
}
case nir_tex_src_ms_index:
sample_index = get_src(ctx, instr->src[i].src);
break;
@@ -4212,9 +4221,11 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
break;
case nir_tex_src_ddx:
ddx = get_src(ctx, instr->src[i].src);
args.g16 = instr->src[i].src.ssa->bit_size == 16;
break;
case nir_tex_src_ddy:
ddy = get_src(ctx, instr->src[i].src);
assert(LLVMTypeOf(ddy) == LLVMTypeOf(ddx));
break;
case nir_tex_src_min_lod:
args.min_lod = get_src(ctx, instr->src[i].src);
@@ -4342,8 +4353,9 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
}
for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
args.derivs[i] = ctx->ac.f32_0;
args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
LLVMValueRef zero = args.g16 ? ctx->ac.f16_0 : ctx->ac.f32_0;
args.derivs[i] = zero;
args.derivs[num_dest_deriv_channels + i] = zero;
}
}
@@ -4351,7 +4363,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
for (unsigned chan = 0; chan < instr->coord_components; chan++)
args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
if (instr->coord_components == 3)
args.coords[3] = LLVMGetUndef(ctx->ac.f32);
args.coords[3] = LLVMGetUndef(args.a16 ? ctx->ac.f16 : ctx->ac.f32);
ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array,
instr->op == nir_texop_lod, args.coords, args.derivs);
}
@@ -4375,9 +4387,9 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
instr->op != nir_texop_lod) {
LLVMValueRef filler;
if (instr->op == nir_texop_txf)
filler = ctx->ac.i32_0;
filler = args.a16 ? ctx->ac.i16_0 : ctx->ac.i32_0;
else
filler = LLVMConstReal(ctx->ac.f32, 0.5);
filler = LLVMConstReal(args.a16 ? ctx->ac.f16 : ctx->ac.f32, 0.5);
if (instr->is_array)
args.coords[2] = args.coords[1];
@@ -4417,6 +4429,8 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
num_offsets = MIN2(num_offsets, instr->coord_components);
for (unsigned i = 0; i < num_offsets; ++i) {
LLVMValueRef off = ac_llvm_extract_elem(&ctx->ac, args.offset, i);
if (args.a16)
off = LLVMBuildTrunc(ctx->ac.builder, off, ctx->ac.i16, "");
args.coords[i] = LLVMBuildAdd(ctx->ac.builder, args.coords[i], off, "");
}
args.offset = NULL;