diff --git a/include/drm-uapi/amdgpu_drm.h b/include/drm-uapi/amdgpu_drm.h index 973af6d0662..45dc7e00c89 100644 --- a/include/drm-uapi/amdgpu_drm.h +++ b/include/drm-uapi/amdgpu_drm.h @@ -715,6 +715,7 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_IDS_FLAGS_FUSION 0x1 #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 #define AMDGPU_IDS_FLAGS_TMZ 0x4 +#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8 /* indicate if acceleration can be working */ #define AMDGPU_INFO_ACCEL_WORKING 0x00 diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 1c8a850d822..56b11cbdb2c 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -64,6 +64,7 @@ #define AMDGPU_IDS_FLAGS_FUSION 0x1 #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 #define AMDGPU_IDS_FLAGS_TMZ 0x4 +#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8 #define AMDGPU_INFO_FW_VCE 0x1 #define AMDGPU_INFO_FW_UVD 0x2 #define AMDGPU_INFO_FW_GFX_ME 0x04 @@ -1424,6 +1425,10 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info) info->has_set_reg_pairs = info->pfp_fw_version >= SET_REG_PAIRS_PFP_VERSION; info->has_set_sh_reg_pairs_n = info->pfp_fw_version >= SET_REG_PAIRS_PACKED_N_COUNT14_PFP_VERSION; + + info->conformant_trunc_coord = + info->drm_minor >= 52 && + device_info.ids_flags & AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD; } set_custom_cu_en_mask(info); @@ -1576,6 +1581,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f) fprintf(f, " never_send_perfcounter_stop = %i\n", info->never_send_perfcounter_stop); fprintf(f, " discardable_allows_big_page = %i\n", info->discardable_allows_big_page); fprintf(f, " has_taskmesh_indirect0_bug = %i\n", info->has_taskmesh_indirect0_bug); + fprintf(f, " conformant_trunc_coord = %i\n", info->conformant_trunc_coord); fprintf(f, "Display features:\n"); fprintf(f, " use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index babbf698a88..28cf6cf6028 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -126,6 +126,22 @@ struct radeon_info { bool has_vrs_ds_export_bug; bool has_taskmesh_indirect0_bug; + /* conformant_trunc_coord is equal to TA_CNTL2.TRUNCATE_COORD_MODE, which exists since gfx11. + * + * If TA_CNTL2.TRUNCATE_COORD_MODE == 0, coordinate truncation is the same as gfx10 and older. + * If TA_CNTL2.TRUNCATE_COORD_MODE == 1, coordinate truncation is adjusted to be conformant + * if you also set TRUNC_COORD. + * + * Behavior: + * truncate_coord_xy = TRUNC_COORD && + * ((xy_filter == Point && !gather) || !TA_CNTL2.TRUNCATE_COORD_MODE); + * truncate_coord_z = TRUNC_COORD && (z_filter == Point || !TA_CNTL2.TRUNCATE_COORD_MODE); + * truncate_coord_layer = TRUNC_COORD && !TA_CNTL2.TRUNCATE_COORD_MODE; + * + * AnisoPoint is treated as Point. + */ + bool conformant_trunc_coord; + /* Display features. */ /* There are 2 display DCC codepaths, because display expects unaligned DCC. */ /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */ diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 4b0b0d44977..8d69648dcc4 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4611,7 +4611,8 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) /* Texture coordinates fixups */ if (instr->coord_components > 1 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) { - args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]); + if (!ctx->abi->conformant_trunc_coord) + args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]); } if (instr->coord_components > 2 && @@ -4620,7 +4621,8 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && instr->is_array && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms && instr->op != nir_texop_fragment_fetch_amd && instr->op != nir_texop_fragment_mask_fetch_amd) { - args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]); + if (!ctx->abi->conformant_trunc_coord) + args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]); } if (ctx->ac.gfx_level == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D && @@ -4686,7 +4688,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) } /* Set TRUNC_COORD=0 for textureGather(). */ - if (instr->op == nir_texop_tg4) { + if (instr->op == nir_texop_tg4 && !ctx->abi->conformant_trunc_coord) { LLVMValueRef dword0 = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, ctx->ac.i32_0, ""); dword0 = LLVMBuildAnd(ctx->ac.builder, dword0, LLVMConstInt(ctx->ac.i32, C_008F30_TRUNC_COORD, 0), ""); args.sampler = LLVMBuildInsertElement(ctx->ac.builder, args.sampler, dword0, ctx->ac.i32_0, ""); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 4943346a0a0..470d85b3653 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -129,6 +129,9 @@ struct ac_shader_abi { /* Whether to disable anisotropic filtering. */ bool disable_aniso_single_level; + /* Equal to radeon_info::conformant_trunc_coord. */ + bool conformant_trunc_coord; + /* Number of all interpolated inputs */ unsigned num_interp; }; diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index d90ea7efa73..a47609b3efc 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -7719,7 +7719,8 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND; unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; bool trunc_coord = - pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST; + (pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) || + device->physical_device->rad_info.conformant_trunc_coord; bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index c017bc87098..7f162bc85d8 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -907,6 +907,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.clamp_shadow_reference = false; ctx.abi.robust_buffer_access = options->robust_buffer_access; ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr; + ctx.abi.conformant_trunc_coord = options->conformant_trunc_coord; bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg; if (shader_count >= 2 || is_ngg) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index b116e642a4b..927843523e7 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -926,7 +926,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_ .lower_to_fragment_fetch_amd = device->physical_device->use_fmask, .lower_lod_zero_width = true, .lower_invalid_implicit_lod = true, - .lower_array_layer_round_even = true, + .lower_array_layer_round_even = !device->physical_device->rad_info.conformant_trunc_coord, }; NIR_PASS(_, nir, nir_lower_tex, &tex_options); @@ -2322,6 +2322,7 @@ radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, options->family = device->physical_device->rad_info.family; options->gfx_level = device->physical_device->rad_info.gfx_level; options->has_3d_cube_border_color_mipmap = device->physical_device->rad_info.has_3d_cube_border_color_mipmap; + options->conformant_trunc_coord = device->physical_device->rad_info.conformant_trunc_coord; options->dump_shader = can_dump_shader; options->dump_preoptir = options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR; diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index d9d69708ff9..c36c18f9e8e 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -138,6 +138,7 @@ struct radv_nir_compiler_options { enum amd_gfx_level gfx_level; uint32_t address32_hi; bool has_3d_cube_border_color_mipmap; + bool conformant_trunc_coord; struct { void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 8d31ec60f2a..21ca1f40828 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -1002,6 +1002,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO; ctx->abi.use_waterfall_for_divergent_tex_samplers = true; ctx->abi.disable_aniso_single_level = true; + ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord; unsigned num_outputs = info->num_outputs; /* need extra output to hold primitive id added by nir lower */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index d51d09f246d..119c9ace8f5 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4765,9 +4765,10 @@ static void *si_create_sampler_state(struct pipe_context *ctx, struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy; unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso); - bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST && - state->mag_img_filter == PIPE_TEX_FILTER_NEAREST && - state->compare_mode == PIPE_TEX_COMPARE_NONE; + bool trunc_coord = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST && + state->mag_img_filter == PIPE_TEX_FILTER_NEAREST && + state->compare_mode == PIPE_TEX_COMPARE_NONE) || + sscreen->info.conformant_trunc_coord; union pipe_color_union clamped_border_color; if (!rstate) {