radv: Fix float16 interpolation set up.
float16 types can have non-flat interpolation so set up the HW
correctly for that.
Fixes: 62024fa775
"radv: enable VK_KHR_16bit_storage extension / 16bit storage features"
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
@@ -919,6 +919,37 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
|
|||||||
ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
|
ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LLVMValueRef
|
||||||
|
ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
|
||||||
|
LLVMValueRef llvm_chan,
|
||||||
|
LLVMValueRef attr_number,
|
||||||
|
LLVMValueRef params,
|
||||||
|
LLVMValueRef i,
|
||||||
|
LLVMValueRef j)
|
||||||
|
{
|
||||||
|
LLVMValueRef args[6];
|
||||||
|
LLVMValueRef p1;
|
||||||
|
|
||||||
|
args[0] = i;
|
||||||
|
args[1] = llvm_chan;
|
||||||
|
args[2] = attr_number;
|
||||||
|
args[3] = ctx->i1false;
|
||||||
|
args[4] = params;
|
||||||
|
|
||||||
|
p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
|
||||||
|
ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
|
||||||
|
|
||||||
|
args[0] = p1;
|
||||||
|
args[1] = j;
|
||||||
|
args[2] = llvm_chan;
|
||||||
|
args[3] = attr_number;
|
||||||
|
args[4] = ctx->i1false;
|
||||||
|
args[5] = params;
|
||||||
|
|
||||||
|
return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
|
||||||
|
ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
|
||||||
|
}
|
||||||
|
|
||||||
LLVMValueRef
|
LLVMValueRef
|
||||||
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
|
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
|
||||||
LLVMValueRef parameter,
|
LLVMValueRef parameter,
|
||||||
|
@@ -213,6 +213,14 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
|
|||||||
LLVMValueRef i,
|
LLVMValueRef i,
|
||||||
LLVMValueRef j);
|
LLVMValueRef j);
|
||||||
|
|
||||||
|
LLVMValueRef
|
||||||
|
ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
|
||||||
|
LLVMValueRef llvm_chan,
|
||||||
|
LLVMValueRef attr_number,
|
||||||
|
LLVMValueRef params,
|
||||||
|
LLVMValueRef i,
|
||||||
|
LLVMValueRef j);
|
||||||
|
|
||||||
LLVMValueRef
|
LLVMValueRef
|
||||||
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
|
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
|
||||||
LLVMValueRef parameter,
|
LLVMValueRef parameter,
|
||||||
|
@@ -806,7 +806,7 @@ void radv_GetPhysicalDeviceFeatures2(
|
|||||||
features->storageBuffer16BitAccess = enabled;
|
features->storageBuffer16BitAccess = enabled;
|
||||||
features->uniformAndStorageBuffer16BitAccess = enabled;
|
features->uniformAndStorageBuffer16BitAccess = enabled;
|
||||||
features->storagePushConstant16 = enabled;
|
features->storagePushConstant16 = enabled;
|
||||||
features->storageInputOutput16 = enabled;
|
features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
|
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
|
||||||
|
@@ -92,6 +92,7 @@ struct radv_shader_context {
|
|||||||
gl_shader_stage stage;
|
gl_shader_stage stage;
|
||||||
|
|
||||||
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
|
LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
|
||||||
|
uint64_t float16_shaded_mask;
|
||||||
|
|
||||||
uint64_t input_mask;
|
uint64_t input_mask;
|
||||||
uint64_t output_mask;
|
uint64_t output_mask;
|
||||||
@@ -2197,6 +2198,7 @@ static void interp_fs_input(struct radv_shader_context *ctx,
|
|||||||
unsigned attr,
|
unsigned attr,
|
||||||
LLVMValueRef interp_param,
|
LLVMValueRef interp_param,
|
||||||
LLVMValueRef prim_mask,
|
LLVMValueRef prim_mask,
|
||||||
|
bool float16,
|
||||||
LLVMValueRef result[4])
|
LLVMValueRef result[4])
|
||||||
{
|
{
|
||||||
LLVMValueRef attr_number;
|
LLVMValueRef attr_number;
|
||||||
@@ -2229,7 +2231,12 @@ static void interp_fs_input(struct radv_shader_context *ctx,
|
|||||||
for (chan = 0; chan < 4; chan++) {
|
for (chan = 0; chan < 4; chan++) {
|
||||||
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
|
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
|
||||||
|
|
||||||
if (interp) {
|
if (interp && float16) {
|
||||||
|
result[chan] = ac_build_fs_interp_f16(&ctx->ac,
|
||||||
|
llvm_chan,
|
||||||
|
attr_number,
|
||||||
|
prim_mask, i, j);
|
||||||
|
} else if (interp) {
|
||||||
result[chan] = ac_build_fs_interp(&ctx->ac,
|
result[chan] = ac_build_fs_interp(&ctx->ac,
|
||||||
llvm_chan,
|
llvm_chan,
|
||||||
attr_number,
|
attr_number,
|
||||||
@@ -2241,7 +2248,30 @@ static void interp_fs_input(struct radv_shader_context *ctx,
|
|||||||
attr_number,
|
attr_number,
|
||||||
prim_mask);
|
prim_mask);
|
||||||
result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, "");
|
result[chan] = LLVMBuildBitCast(ctx->ac.builder, result[chan], ctx->ac.i32, "");
|
||||||
result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], LLVMTypeOf(interp_param), "");
|
result[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, result[chan], float16 ? ctx->ac.i16 : ctx->ac.i32, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mark_16bit_fs_input(struct radv_shader_context *ctx,
|
||||||
|
const struct glsl_type *type,
|
||||||
|
int location)
|
||||||
|
{
|
||||||
|
if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
|
||||||
|
unsigned attrib_count = glsl_count_attribute_slots(type, false);
|
||||||
|
if (glsl_type_is_16bit(type)) {
|
||||||
|
ctx->float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
|
||||||
|
}
|
||||||
|
} else if (glsl_type_is_array(type)) {
|
||||||
|
unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
|
||||||
|
for (unsigned i = 0; i < glsl_get_length(type); ++i) {
|
||||||
|
mark_16bit_fs_input(ctx, glsl_get_array_element(type), location + i * stride);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assert(glsl_type_is_struct(type));
|
||||||
|
for (unsigned i = 0; i < glsl_get_length(type); i++) {
|
||||||
|
mark_16bit_fs_input(ctx, glsl_get_struct_field(type, i), location);
|
||||||
|
location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2262,7 +2292,8 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
|
|||||||
unsigned component_count = variable->data.location_frac +
|
unsigned component_count = variable->data.location_frac +
|
||||||
glsl_get_length(variable->type);
|
glsl_get_length(variable->type);
|
||||||
attrib_count = (component_count + 3) / 4;
|
attrib_count = (component_count + 3) / 4;
|
||||||
}
|
} else
|
||||||
|
mark_16bit_fs_input(ctx, variable->type, idx);
|
||||||
|
|
||||||
mask = ((1ull << attrib_count) - 1) << variable->data.location;
|
mask = ((1ull << attrib_count) - 1) << variable->data.location;
|
||||||
|
|
||||||
@@ -2277,10 +2308,8 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
|
|||||||
|
|
||||||
interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
|
interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
|
||||||
}
|
}
|
||||||
bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
|
|
||||||
LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32;
|
|
||||||
if (interp == NULL)
|
if (interp == NULL)
|
||||||
interp = LLVMGetUndef(type);
|
interp = LLVMGetUndef(ctx->ac.i32);
|
||||||
|
|
||||||
for (unsigned i = 0; i < attrib_count; ++i)
|
for (unsigned i = 0; i < attrib_count; ++i)
|
||||||
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
|
ctx->inputs[ac_llvm_reg_index_soa(idx + i, 0)] = interp;
|
||||||
@@ -2346,11 +2375,14 @@ handle_fs_inputs(struct radv_shader_context *ctx,
|
|||||||
if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
|
if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
|
||||||
i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
|
i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
|
||||||
interp_param = *inputs;
|
interp_param = *inputs;
|
||||||
interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask,
|
bool float16 = (ctx->float16_shaded_mask >> i) & 1;
|
||||||
|
interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, float16,
|
||||||
inputs);
|
inputs);
|
||||||
|
|
||||||
if (LLVMIsUndef(interp_param))
|
if (LLVMIsUndef(interp_param))
|
||||||
ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
|
ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
|
||||||
|
if (float16)
|
||||||
|
ctx->shader_info->fs.float16_shaded_mask |= 1u << index;
|
||||||
if (i >= VARYING_SLOT_VAR0)
|
if (i >= VARYING_SLOT_VAR0)
|
||||||
ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
|
ctx->abi.fs_input_attr_indices[i - VARYING_SLOT_VAR0] = index;
|
||||||
++index;
|
++index;
|
||||||
@@ -2362,7 +2394,7 @@ handle_fs_inputs(struct radv_shader_context *ctx,
|
|||||||
|
|
||||||
interp_param = *inputs;
|
interp_param = *inputs;
|
||||||
interp_fs_input(ctx, index, interp_param,
|
interp_fs_input(ctx, index, interp_param,
|
||||||
ctx->abi.prim_mask, inputs);
|
ctx->abi.prim_mask, false, inputs);
|
||||||
++index;
|
++index;
|
||||||
}
|
}
|
||||||
} else if (i == VARYING_SLOT_POS) {
|
} else if (i == VARYING_SLOT_POS) {
|
||||||
|
@@ -3101,13 +3101,17 @@ radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
|
|||||||
radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
|
radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
|
static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade, bool float16)
|
||||||
{
|
{
|
||||||
uint32_t ps_input_cntl;
|
uint32_t ps_input_cntl;
|
||||||
if (offset <= AC_EXP_PARAM_OFFSET_31) {
|
if (offset <= AC_EXP_PARAM_OFFSET_31) {
|
||||||
ps_input_cntl = S_028644_OFFSET(offset);
|
ps_input_cntl = S_028644_OFFSET(offset);
|
||||||
if (flat_shade)
|
if (flat_shade)
|
||||||
ps_input_cntl |= S_028644_FLAT_SHADE(1);
|
ps_input_cntl |= S_028644_FLAT_SHADE(1);
|
||||||
|
if (float16) {
|
||||||
|
ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
|
||||||
|
S_028644_ATTR0_VALID(1);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
/* The input is a DEFAULT_VAL constant. */
|
/* The input is a DEFAULT_VAL constant. */
|
||||||
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
|
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
|
||||||
@@ -3132,7 +3136,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
|
|||||||
if (ps->info.info.ps.prim_id_input) {
|
if (ps->info.info.ps.prim_id_input) {
|
||||||
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
|
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
|
||||||
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
|
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
|
||||||
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
|
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
|
||||||
++ps_offset;
|
++ps_offset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3142,9 +3146,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
|
|||||||
ps->info.info.needs_multiview_view_index) {
|
ps->info.info.needs_multiview_view_index) {
|
||||||
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
|
unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
|
||||||
if (vs_offset != AC_EXP_PARAM_UNDEFINED)
|
if (vs_offset != AC_EXP_PARAM_UNDEFINED)
|
||||||
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
|
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false);
|
||||||
else
|
else
|
||||||
ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
|
ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false);
|
||||||
++ps_offset;
|
++ps_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3160,14 +3164,14 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
|
|||||||
|
|
||||||
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
|
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
|
||||||
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
|
if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
|
||||||
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
|
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
|
||||||
++ps_offset;
|
++ps_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
|
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
|
||||||
if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
|
if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
|
||||||
ps->info.info.ps.num_input_clips_culls > 4) {
|
ps->info.info.ps.num_input_clips_culls > 4) {
|
||||||
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false);
|
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false);
|
||||||
++ps_offset;
|
++ps_offset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3175,6 +3179,7 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
|
|||||||
for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
|
for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
|
||||||
unsigned vs_offset;
|
unsigned vs_offset;
|
||||||
bool flat_shade;
|
bool flat_shade;
|
||||||
|
bool float16;
|
||||||
if (!(ps->info.fs.input_mask & (1u << i)))
|
if (!(ps->info.fs.input_mask & (1u << i)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@@ -3186,8 +3191,9 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
|
flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
|
||||||
|
float16 = !!(ps->info.fs.float16_shaded_mask & (1u << ps_offset));
|
||||||
|
|
||||||
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
|
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
|
||||||
++ps_offset;
|
++ps_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -266,6 +266,7 @@ struct radv_shader_variant_info {
|
|||||||
unsigned num_interp;
|
unsigned num_interp;
|
||||||
uint32_t input_mask;
|
uint32_t input_mask;
|
||||||
uint32_t flat_shaded_mask;
|
uint32_t flat_shaded_mask;
|
||||||
|
uint32_t float16_shaded_mask;
|
||||||
bool can_discard;
|
bool can_discard;
|
||||||
bool early_fragment_test;
|
bool early_fragment_test;
|
||||||
} fs;
|
} fs;
|
||||||
|
Reference in New Issue
Block a user