ac/llvm: add support for 16-bit source operands for samplers
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9395>
This commit is contained in:
@@ -2164,6 +2164,29 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
|
|||||||
assert(!a->d16 || (ctx->chip_class >= GFX8 && a->opcode != ac_image_atomic &&
|
assert(!a->d16 || (ctx->chip_class >= GFX8 && a->opcode != ac_image_atomic &&
|
||||||
a->opcode != ac_image_atomic_cmpswap && a->opcode != ac_image_get_lod &&
|
a->opcode != ac_image_atomic_cmpswap && a->opcode != ac_image_get_lod &&
|
||||||
a->opcode != ac_image_get_resinfo));
|
a->opcode != ac_image_get_resinfo));
|
||||||
|
assert(!a->a16 || ctx->chip_class >= GFX9);
|
||||||
|
assert(a->g16 == a->a16 || ctx->chip_class >= GFX10);
|
||||||
|
|
||||||
|
assert(!a->offset ||
|
||||||
|
ac_get_elem_bits(ctx, LLVMTypeOf(a->offset)) == 32);
|
||||||
|
assert(!a->bias ||
|
||||||
|
ac_get_elem_bits(ctx, LLVMTypeOf(a->bias)) == 32);
|
||||||
|
assert(!a->compare ||
|
||||||
|
ac_get_elem_bits(ctx, LLVMTypeOf(a->compare)) == 32);
|
||||||
|
assert(!a->derivs[0] ||
|
||||||
|
((!a->g16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->derivs[0])) == 16) &&
|
||||||
|
(a->g16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->derivs[0])) == 32)));
|
||||||
|
assert(!a->coords[0] ||
|
||||||
|
((!a->a16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])) == 16) &&
|
||||||
|
(a->a16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])) == 32)));
|
||||||
|
assert(!a->lod ||
|
||||||
|
((a->opcode != ac_image_get_resinfo || ac_get_elem_bits(ctx, LLVMTypeOf(a->lod))) &&
|
||||||
|
(a->opcode == ac_image_get_resinfo ||
|
||||||
|
ac_get_elem_bits(ctx, LLVMTypeOf(a->lod)) ==
|
||||||
|
ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])))));
|
||||||
|
assert(!a->min_lod ||
|
||||||
|
ac_get_elem_bits(ctx, LLVMTypeOf(a->min_lod)) ==
|
||||||
|
ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])));
|
||||||
|
|
||||||
if (a->opcode == ac_image_get_lod) {
|
if (a->opcode == ac_image_get_lod) {
|
||||||
switch (dim) {
|
switch (dim) {
|
||||||
@@ -2184,7 +2207,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
|
|||||||
bool atomic = a->opcode == ac_image_atomic || a->opcode == ac_image_atomic_cmpswap;
|
bool atomic = a->opcode == ac_image_atomic || a->opcode == ac_image_atomic_cmpswap;
|
||||||
bool load = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
|
bool load = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
|
||||||
a->opcode == ac_image_load || a->opcode == ac_image_load_mip;
|
a->opcode == ac_image_load || a->opcode == ac_image_load_mip;
|
||||||
LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32;
|
LLVMTypeRef coord_type = sample ? (a->a16 ? ctx->f16 : ctx->f32) : (a->a16 ? ctx->i16 : ctx->i32);
|
||||||
uint8_t dmask = a->dmask;
|
uint8_t dmask = a->dmask;
|
||||||
LLVMTypeRef data_type;
|
LLVMTypeRef data_type;
|
||||||
char data_type_str[32];
|
char data_type_str[32];
|
||||||
@@ -2225,7 +2248,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
|
|||||||
unsigned count = ac_num_derivs(dim);
|
unsigned count = ac_num_derivs(dim);
|
||||||
for (unsigned i = 0; i < count; ++i)
|
for (unsigned i = 0; i < count; ++i)
|
||||||
args[num_args++] = ac_to_float(ctx, a->derivs[i]);
|
args[num_args++] = ac_to_float(ctx, a->derivs[i]);
|
||||||
overload[num_overloads++] = ".f32";
|
overload[num_overloads++] = a->g16 ? ".f16" : ".f32";
|
||||||
}
|
}
|
||||||
unsigned num_coords = a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0;
|
unsigned num_coords = a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0;
|
||||||
for (unsigned i = 0; i < num_coords; ++i)
|
for (unsigned i = 0; i < num_coords; ++i)
|
||||||
@@ -2235,7 +2258,7 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_
|
|||||||
if (a->min_lod)
|
if (a->min_lod)
|
||||||
args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, "");
|
args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, "");
|
||||||
|
|
||||||
overload[num_overloads++] = sample ? ".f32" : ".i32";
|
overload[num_overloads++] = sample ? (a->a16 ? ".f16" : ".f32") : (a->a16 ? ".i16" : ".i32");
|
||||||
|
|
||||||
args[num_args++] = a->resource;
|
args[num_args++] = a->resource;
|
||||||
if (sample) {
|
if (sample) {
|
||||||
@@ -3373,6 +3396,7 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LL
|
|||||||
fmask_load.coords[1] = addr[1];
|
fmask_load.coords[1] = addr[1];
|
||||||
if (is_array_tex)
|
if (is_array_tex)
|
||||||
fmask_load.coords[2] = addr[2];
|
fmask_load.coords[2] = addr[2];
|
||||||
|
fmask_load.a16 = ac_get_elem_bits(ac, LLVMTypeOf(addr[0])) == 16;
|
||||||
|
|
||||||
LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
|
LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
|
||||||
fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, "");
|
fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, ac->i32_0, "");
|
||||||
@@ -3380,11 +3404,15 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, LL
|
|||||||
/* Apply the formula. */
|
/* Apply the formula. */
|
||||||
unsigned sample_chan = is_array_tex ? 3 : 2;
|
unsigned sample_chan = is_array_tex ? 3 : 2;
|
||||||
LLVMValueRef final_sample;
|
LLVMValueRef final_sample;
|
||||||
final_sample = LLVMBuildMul(ac->builder, addr[sample_chan], LLVMConstInt(ac->i32, 4, 0), "");
|
final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
|
||||||
final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, "");
|
LLVMConstInt(LLVMTypeOf(addr[0]), 4, 0), "");
|
||||||
|
final_sample = LLVMBuildLShr(ac->builder, fmask_value,
|
||||||
|
LLVMBuildZExt(ac->builder, final_sample, ac->i32, ""), "");
|
||||||
/* Mask the sample index by 0x7, because 0x8 means an unknown value
|
/* Mask the sample index by 0x7, because 0x8 means an unknown value
|
||||||
* with EQAA, so those will map to 0. */
|
* with EQAA, so those will map to 0. */
|
||||||
final_sample = LLVMBuildAnd(ac->builder, final_sample, LLVMConstInt(ac->i32, 0x7, 0), "");
|
final_sample = LLVMBuildAnd(ac->builder, final_sample, LLVMConstInt(ac->i32, 0x7, 0), "");
|
||||||
|
if (fmask_load.a16)
|
||||||
|
final_sample = LLVMBuildTrunc(ac->builder, final_sample, ac->i16, "");
|
||||||
|
|
||||||
/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
|
/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
|
||||||
* resource descriptor is 0 (invalid).
|
* resource descriptor is 0 (invalid).
|
||||||
|
@@ -403,7 +403,9 @@ struct ac_image_args {
|
|||||||
unsigned cache_policy : 3;
|
unsigned cache_policy : 3;
|
||||||
bool unorm : 1;
|
bool unorm : 1;
|
||||||
bool level_zero : 1;
|
bool level_zero : 1;
|
||||||
bool d16 : 1; /* data and return values are 16-bit, requires GFX8+ */
|
bool d16 : 1; /* GFX8+: data and return values are 16-bit */
|
||||||
|
bool a16 : 1; /* GFX9+: address components except compare, offset and bias are 16-bit */
|
||||||
|
bool g16 : 1; /* GFX10+: derivatives are 16-bit; GFX<=9: must be equal to a16 */
|
||||||
bool tfe : 1;
|
bool tfe : 1;
|
||||||
unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
|
unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
|
||||||
|
|
||||||
|
@@ -1430,6 +1430,11 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_te
|
|||||||
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
|
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
|
||||||
|
|
||||||
assert(instr->dest.is_ssa);
|
assert(instr->dest.is_ssa);
|
||||||
|
|
||||||
|
/* Buffers don't support A16. */
|
||||||
|
if (args->a16)
|
||||||
|
args->coords[0] = LLVMBuildZExt(ctx->ac.builder, args->coords[0], ctx->ac.i32, "");
|
||||||
|
|
||||||
return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0,
|
return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0,
|
||||||
util_last_bit(mask), 0, true,
|
util_last_bit(mask), 0, true,
|
||||||
instr->dest.ssa.bit_size == 16,
|
instr->dest.ssa.bit_size == 16,
|
||||||
@@ -4179,6 +4184,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||||||
switch (instr->src[i].src_type) {
|
switch (instr->src[i].src_type) {
|
||||||
case nir_tex_src_coord: {
|
case nir_tex_src_coord: {
|
||||||
LLVMValueRef coord = get_src(ctx, instr->src[i].src);
|
LLVMValueRef coord = get_src(ctx, instr->src[i].src);
|
||||||
|
args.a16 = instr->src[i].src.ssa->bit_size == 16;
|
||||||
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
|
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
|
||||||
args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
|
args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
|
||||||
break;
|
break;
|
||||||
@@ -4189,22 +4195,25 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||||||
if (instr->is_shadow) {
|
if (instr->is_shadow) {
|
||||||
args.compare = get_src(ctx, instr->src[i].src);
|
args.compare = get_src(ctx, instr->src[i].src);
|
||||||
args.compare = ac_to_float(&ctx->ac, args.compare);
|
args.compare = ac_to_float(&ctx->ac, args.compare);
|
||||||
|
assert(instr->src[i].src.ssa->bit_size == 32);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case nir_tex_src_offset:
|
case nir_tex_src_offset:
|
||||||
args.offset = get_src(ctx, instr->src[i].src);
|
args.offset = get_src(ctx, instr->src[i].src);
|
||||||
offset_src = i;
|
offset_src = i;
|
||||||
|
/* We pack it with bit shifts, so we need it to be 32-bit. */
|
||||||
|
assert(ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.offset)) == 32);
|
||||||
break;
|
break;
|
||||||
case nir_tex_src_bias:
|
case nir_tex_src_bias:
|
||||||
args.bias = get_src(ctx, instr->src[i].src);
|
args.bias = get_src(ctx, instr->src[i].src);
|
||||||
|
assert(ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.bias)) == 32);
|
||||||
break;
|
break;
|
||||||
case nir_tex_src_lod: {
|
case nir_tex_src_lod:
|
||||||
if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
|
if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
|
||||||
args.level_zero = true;
|
args.level_zero = true;
|
||||||
else
|
else
|
||||||
args.lod = get_src(ctx, instr->src[i].src);
|
args.lod = get_src(ctx, instr->src[i].src);
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
case nir_tex_src_ms_index:
|
case nir_tex_src_ms_index:
|
||||||
sample_index = get_src(ctx, instr->src[i].src);
|
sample_index = get_src(ctx, instr->src[i].src);
|
||||||
break;
|
break;
|
||||||
@@ -4212,9 +4221,11 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||||||
break;
|
break;
|
||||||
case nir_tex_src_ddx:
|
case nir_tex_src_ddx:
|
||||||
ddx = get_src(ctx, instr->src[i].src);
|
ddx = get_src(ctx, instr->src[i].src);
|
||||||
|
args.g16 = instr->src[i].src.ssa->bit_size == 16;
|
||||||
break;
|
break;
|
||||||
case nir_tex_src_ddy:
|
case nir_tex_src_ddy:
|
||||||
ddy = get_src(ctx, instr->src[i].src);
|
ddy = get_src(ctx, instr->src[i].src);
|
||||||
|
assert(LLVMTypeOf(ddy) == LLVMTypeOf(ddx));
|
||||||
break;
|
break;
|
||||||
case nir_tex_src_min_lod:
|
case nir_tex_src_min_lod:
|
||||||
args.min_lod = get_src(ctx, instr->src[i].src);
|
args.min_lod = get_src(ctx, instr->src[i].src);
|
||||||
@@ -4342,8 +4353,9 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||||||
ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
|
ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
|
||||||
}
|
}
|
||||||
for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
|
for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
|
||||||
args.derivs[i] = ctx->ac.f32_0;
|
LLVMValueRef zero = args.g16 ? ctx->ac.f16_0 : ctx->ac.f32_0;
|
||||||
args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
|
args.derivs[i] = zero;
|
||||||
|
args.derivs[num_dest_deriv_channels + i] = zero;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4351,7 +4363,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||||||
for (unsigned chan = 0; chan < instr->coord_components; chan++)
|
for (unsigned chan = 0; chan < instr->coord_components; chan++)
|
||||||
args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
|
args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
|
||||||
if (instr->coord_components == 3)
|
if (instr->coord_components == 3)
|
||||||
args.coords[3] = LLVMGetUndef(ctx->ac.f32);
|
args.coords[3] = LLVMGetUndef(args.a16 ? ctx->ac.f16 : ctx->ac.f32);
|
||||||
ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array,
|
ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array,
|
||||||
instr->op == nir_texop_lod, args.coords, args.derivs);
|
instr->op == nir_texop_lod, args.coords, args.derivs);
|
||||||
}
|
}
|
||||||
@@ -4375,9 +4387,9 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||||||
instr->op != nir_texop_lod) {
|
instr->op != nir_texop_lod) {
|
||||||
LLVMValueRef filler;
|
LLVMValueRef filler;
|
||||||
if (instr->op == nir_texop_txf)
|
if (instr->op == nir_texop_txf)
|
||||||
filler = ctx->ac.i32_0;
|
filler = args.a16 ? ctx->ac.i16_0 : ctx->ac.i32_0;
|
||||||
else
|
else
|
||||||
filler = LLVMConstReal(ctx->ac.f32, 0.5);
|
filler = LLVMConstReal(args.a16 ? ctx->ac.f16 : ctx->ac.f32, 0.5);
|
||||||
|
|
||||||
if (instr->is_array)
|
if (instr->is_array)
|
||||||
args.coords[2] = args.coords[1];
|
args.coords[2] = args.coords[1];
|
||||||
@@ -4417,6 +4429,8 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
|||||||
num_offsets = MIN2(num_offsets, instr->coord_components);
|
num_offsets = MIN2(num_offsets, instr->coord_components);
|
||||||
for (unsigned i = 0; i < num_offsets; ++i) {
|
for (unsigned i = 0; i < num_offsets; ++i) {
|
||||||
LLVMValueRef off = ac_llvm_extract_elem(&ctx->ac, args.offset, i);
|
LLVMValueRef off = ac_llvm_extract_elem(&ctx->ac, args.offset, i);
|
||||||
|
if (args.a16)
|
||||||
|
off = LLVMBuildTrunc(ctx->ac.builder, off, ctx->ac.i16, "");
|
||||||
args.coords[i] = LLVMBuildAdd(ctx->ac.builder, args.coords[i], off, "");
|
args.coords[i] = LLVMBuildAdd(ctx->ac.builder, args.coords[i], off, "");
|
||||||
}
|
}
|
||||||
args.offset = NULL;
|
args.offset = NULL;
|
||||||
|
Reference in New Issue
Block a user