amd: implement conformant TRUNC_COORD behavior for gfx11
For testing, the conformant behavior can be enabled by setting conformant_trunc_coord to true manually and running this to enable the conformant behavior in hw: umr -w *.*.regTA_CNTL2 0x40000 The layer index rounding and TRUNC_COORD resetting workarounds can disabled in the shader compiler. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21525>
This commit is contained in:
@@ -715,6 +715,7 @@ struct drm_amdgpu_cs_chunk_data {
|
||||
#define AMDGPU_IDS_FLAGS_FUSION 0x1
|
||||
#define AMDGPU_IDS_FLAGS_PREEMPTION 0x2
|
||||
#define AMDGPU_IDS_FLAGS_TMZ 0x4
|
||||
#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8
|
||||
|
||||
/* indicate if acceleration can be working */
|
||||
#define AMDGPU_INFO_ACCEL_WORKING 0x00
|
||||
|
@@ -64,6 +64,7 @@
|
||||
#define AMDGPU_IDS_FLAGS_FUSION 0x1
|
||||
#define AMDGPU_IDS_FLAGS_PREEMPTION 0x2
|
||||
#define AMDGPU_IDS_FLAGS_TMZ 0x4
|
||||
#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8
|
||||
#define AMDGPU_INFO_FW_VCE 0x1
|
||||
#define AMDGPU_INFO_FW_UVD 0x2
|
||||
#define AMDGPU_INFO_FW_GFX_ME 0x04
|
||||
@@ -1424,6 +1425,10 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info)
|
||||
|
||||
info->has_set_reg_pairs = info->pfp_fw_version >= SET_REG_PAIRS_PFP_VERSION;
|
||||
info->has_set_sh_reg_pairs_n = info->pfp_fw_version >= SET_REG_PAIRS_PACKED_N_COUNT14_PFP_VERSION;
|
||||
|
||||
info->conformant_trunc_coord =
|
||||
info->drm_minor >= 52 &&
|
||||
device_info.ids_flags & AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD;
|
||||
}
|
||||
|
||||
set_custom_cu_en_mask(info);
|
||||
@@ -1576,6 +1581,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
|
||||
fprintf(f, " never_send_perfcounter_stop = %i\n", info->never_send_perfcounter_stop);
|
||||
fprintf(f, " discardable_allows_big_page = %i\n", info->discardable_allows_big_page);
|
||||
fprintf(f, " has_taskmesh_indirect0_bug = %i\n", info->has_taskmesh_indirect0_bug);
|
||||
fprintf(f, " conformant_trunc_coord = %i\n", info->conformant_trunc_coord);
|
||||
|
||||
fprintf(f, "Display features:\n");
|
||||
fprintf(f, " use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned);
|
||||
|
@@ -126,6 +126,22 @@ struct radeon_info {
|
||||
bool has_vrs_ds_export_bug;
|
||||
bool has_taskmesh_indirect0_bug;
|
||||
|
||||
/* conformant_trunc_coord is equal to TA_CNTL2.TRUNCATE_COORD_MODE, which exists since gfx11.
|
||||
*
|
||||
* If TA_CNTL2.TRUNCATE_COORD_MODE == 0, coordinate truncation is the same as gfx10 and older.
|
||||
* If TA_CNTL2.TRUNCATE_COORD_MODE == 1, coordinate truncation is adjusted to be conformant
|
||||
* if you also set TRUNC_COORD.
|
||||
*
|
||||
* Behavior:
|
||||
* truncate_coord_xy = TRUNC_COORD &&
|
||||
* ((xy_filter == Point && !gather) || !TA_CNTL2.TRUNCATE_COORD_MODE);
|
||||
* truncate_coord_z = TRUNC_COORD && (z_filter == Point || !TA_CNTL2.TRUNCATE_COORD_MODE);
|
||||
* truncate_coord_layer = TRUNC_COORD && !TA_CNTL2.TRUNCATE_COORD_MODE;
|
||||
*
|
||||
* AnisoPoint is treated as Point.
|
||||
*/
|
||||
bool conformant_trunc_coord;
|
||||
|
||||
/* Display features. */
|
||||
/* There are 2 display DCC codepaths, because display expects unaligned DCC. */
|
||||
/* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */
|
||||
|
@@ -4611,6 +4611,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
||||
/* Texture coordinates fixups */
|
||||
if (instr->coord_components > 1 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
|
||||
instr->is_array && instr->op != nir_texop_txf) {
|
||||
if (!ctx->abi->conformant_trunc_coord)
|
||||
args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);
|
||||
}
|
||||
|
||||
@@ -4620,6 +4621,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
||||
instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
|
||||
instr->is_array && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms &&
|
||||
instr->op != nir_texop_fragment_fetch_amd && instr->op != nir_texop_fragment_mask_fetch_amd) {
|
||||
if (!ctx->abi->conformant_trunc_coord)
|
||||
args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
|
||||
}
|
||||
|
||||
@@ -4686,7 +4688,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
||||
}
|
||||
|
||||
/* Set TRUNC_COORD=0 for textureGather(). */
|
||||
if (instr->op == nir_texop_tg4) {
|
||||
if (instr->op == nir_texop_tg4 && !ctx->abi->conformant_trunc_coord) {
|
||||
LLVMValueRef dword0 = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, ctx->ac.i32_0, "");
|
||||
dword0 = LLVMBuildAnd(ctx->ac.builder, dword0, LLVMConstInt(ctx->ac.i32, C_008F30_TRUNC_COORD, 0), "");
|
||||
args.sampler = LLVMBuildInsertElement(ctx->ac.builder, args.sampler, dword0, ctx->ac.i32_0, "");
|
||||
|
@@ -129,6 +129,9 @@ struct ac_shader_abi {
|
||||
/* Whether to disable anisotropic filtering. */
|
||||
bool disable_aniso_single_level;
|
||||
|
||||
/* Equal to radeon_info::conformant_trunc_coord. */
|
||||
bool conformant_trunc_coord;
|
||||
|
||||
/* Number of all interpolated inputs */
|
||||
unsigned num_interp;
|
||||
};
|
||||
|
@@ -7719,7 +7719,8 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
|
||||
unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
|
||||
unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
|
||||
bool trunc_coord =
|
||||
pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
|
||||
(pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) ||
|
||||
device->physical_device->rad_info.conformant_trunc_coord;
|
||||
bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
|
||||
pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
|
||||
pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
|
||||
|
@@ -907,6 +907,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
||||
ctx.abi.clamp_shadow_reference = false;
|
||||
ctx.abi.robust_buffer_access = options->robust_buffer_access;
|
||||
ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr;
|
||||
ctx.abi.conformant_trunc_coord = options->conformant_trunc_coord;
|
||||
|
||||
bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg;
|
||||
if (shader_count >= 2 || is_ngg)
|
||||
|
@@ -926,7 +926,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_
|
||||
.lower_to_fragment_fetch_amd = device->physical_device->use_fmask,
|
||||
.lower_lod_zero_width = true,
|
||||
.lower_invalid_implicit_lod = true,
|
||||
.lower_array_layer_round_even = true,
|
||||
.lower_array_layer_round_even = !device->physical_device->rad_info.conformant_trunc_coord,
|
||||
};
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_tex, &tex_options);
|
||||
@@ -2322,6 +2322,7 @@ radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options,
|
||||
options->family = device->physical_device->rad_info.family;
|
||||
options->gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
options->has_3d_cube_border_color_mipmap = device->physical_device->rad_info.has_3d_cube_border_color_mipmap;
|
||||
options->conformant_trunc_coord = device->physical_device->rad_info.conformant_trunc_coord;
|
||||
options->dump_shader = can_dump_shader;
|
||||
options->dump_preoptir =
|
||||
options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
|
||||
|
@@ -138,6 +138,7 @@ struct radv_nir_compiler_options {
|
||||
enum amd_gfx_level gfx_level;
|
||||
uint32_t address32_hi;
|
||||
bool has_3d_cube_border_color_mipmap;
|
||||
bool conformant_trunc_coord;
|
||||
|
||||
struct {
|
||||
void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message);
|
||||
|
@@ -1002,6 +1002,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
|
||||
info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO;
|
||||
ctx->abi.use_waterfall_for_divergent_tex_samplers = true;
|
||||
ctx->abi.disable_aniso_single_level = true;
|
||||
ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord;
|
||||
|
||||
unsigned num_outputs = info->num_outputs;
|
||||
/* need extra output to hold primitive id added by nir lower */
|
||||
|
@@ -4765,9 +4765,10 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
|
||||
struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
|
||||
unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy;
|
||||
unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso);
|
||||
bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
|
||||
bool trunc_coord = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
|
||||
state->mag_img_filter == PIPE_TEX_FILTER_NEAREST &&
|
||||
state->compare_mode == PIPE_TEX_COMPARE_NONE;
|
||||
state->compare_mode == PIPE_TEX_COMPARE_NONE) ||
|
||||
sscreen->info.conformant_trunc_coord;
|
||||
union pipe_color_union clamped_border_color;
|
||||
|
||||
if (!rstate) {
|
||||
|
Reference in New Issue
Block a user