diff --git a/include/drm-uapi/amdgpu_drm.h b/include/drm-uapi/amdgpu_drm.h index bb3806bdf2b..ecec6a0eb57 100644 --- a/include/drm-uapi/amdgpu_drm.h +++ b/include/drm-uapi/amdgpu_drm.h @@ -171,6 +171,8 @@ extern "C" { * may override the MTYPE selected in AMDGPU_VA_OP_MAP. */ #define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) +/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ +#define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16) struct drm_amdgpu_gem_create_in { /** the requested memory size */ @@ -409,6 +411,13 @@ struct drm_amdgpu_gem_userptr { /* GFX12 and later: */ #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 +/* These are DCC recompression setting for memory management: */ +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ /* bit gap */ #define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 #define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 diff --git a/include/drm-uapi/drm_fourcc.h b/include/drm-uapi/drm_fourcc.h index d3c1c7d4c01..d76dc6157a5 100644 --- a/include/drm-uapi/drm_fourcc.h +++ b/include/drm-uapi/drm_fourcc.h @@ -1506,7 +1506,10 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) * 6 - 64KB_3D * 7 - 256KB_3D */ +#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1 +#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2 #define AMD_FMT_MOD_TILE_GFX12_64K_2D 3 +#define AMD_FMT_MOD_TILE_GFX12_256K_2D 4 #define AMD_FMT_MOD_DCC_BLOCK_64B 0 #define AMD_FMT_MOD_DCC_BLOCK_128B 1 diff --git a/src/amd/common/ac_descriptors.c b/src/amd/common/ac_descriptors.c index cebe989149d..7166df90fdf 100644 --- a/src/amd/common/ac_descriptors.c +++ b/src/amd/common/ac_descriptors.c @@ -414,7 +414,29 @@ ac_set_mutable_tex_desc_fields(const struct radeon_info *info, const struct ac_m } } - if (meta_va) { + if (info->gfx_level >= GFX12) { + /* Color and Z/S always support compressed image stores on Gfx12. Enablement is + * mostly controlled by PTE.D (page table bit). The rule is: + * + * Shader Engines (shaders, CB, DB, SC): + * COMPRESSION_ENABLED = PTE.D && COMPRESSION_EN; + * + * Central Hub (CP, SDMA, indices, tess factor loads): + * PTE.D is ignored. Packets and states fully determine enablement. + * + * If !PTE.D, the states enabling compression in shaders, CB, DB, and SC have no effect. + * PTE.D is set per buffer allocation in Linux, not per VM page, so that it's + * automatically propagated between processes. We could optionally allow setting it + * per VM page too. + * + * The DCC/HTILE buffer isn't allocated separately on Gfx12 anymore. The DCC/HTILE + * metadata storage is mostly hidden from userspace, and any buffer can be compressed. + */ + if (state->dcc_enabled) { + desc[6] |= S_00A018_COMPRESSION_EN(1) | + S_00A018_WRITE_COMPRESS_ENABLE(state->gfx10.write_compress_enable); + } + } else if (meta_va) { /* Gfx10-11. */ struct gfx9_surf_meta_flags meta = { .rb_aligned = 1, diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index bbb72bc6043..c24a42748a3 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -59,6 +59,12 @@ #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 #define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 #define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f #define AMDGPU_TILING_SET(field, value) \ (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) #define AMDGPU_TILING_GET(value, field) \ @@ -524,7 +530,7 @@ bool ac_get_supported_modifiers(const struct radeon_info *info, } case GFX12: { /* Chip properties no longer affect tiling, and there is no distinction between displayable - * and non-displayable anymore. + * and non-displayable anymore. (DCC settings may affect displayability though) * * Only declare 64K modifiers for now. */ @@ -537,8 +543,16 @@ bool ac_get_supported_modifiers(const struct radeon_info *info, AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D); + /* Expose both 128B and 64B compressed blocks. */ + uint64_t dcc_128B = AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B); + uint64_t dcc_64B = AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B); + /* Modifiers must be sorted from best to worst. */ - ADD_MOD(mod_64K_2D) + ADD_MOD(mod_64K_2D | dcc_128B) /* 64K with DCC and 128B compressed blocks */ + ADD_MOD(mod_64K_2D | dcc_64B) /* 64K with DCC and 64B compressed blocks */ + ADD_MOD(mod_64K_2D) /* 64K without DCC */ ADD_MOD(mod_64K_2D_as_gfx11) /* the same as above, but for gfx11 interop */ ADD_MOD(DRM_FORMAT_MOD_LINEAR) break; @@ -3188,6 +3202,7 @@ static bool gfx12_compute_surface(struct ac_addrlib *addrlib, const struct radeo /* Select the swizzle mode. */ if (surf->modifier != DRM_FORMAT_MOD_INVALID) { assert(!compressed); + assert(!ac_modifier_has_dcc(surf->modifier) || !(surf->flags & RADEON_SURF_DISABLE_DCC)); AddrSurfInfoIn.swizzleMode = ac_get_modifier_swizzle_mode(info->gfx_level, surf->modifier); } else if (surf->flags & RADEON_SURF_IMPORTED) { AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode; @@ -3218,10 +3233,24 @@ static bool gfx12_compute_surface(struct ac_addrlib *addrlib, const struct radeo surf->u.gfx9.swizzle_mode = AddrSurfInfoIn.swizzleMode; surf->u.gfx9.resource_type = (enum gfx9_resource_type)AddrSurfInfoIn.resourceType; + surf->u.gfx9.gfx12_enable_dcc = ac_modifier_has_dcc(surf->modifier) || + (surf->modifier == DRM_FORMAT_MOD_INVALID && + !(surf->flags & RADEON_SURF_DISABLE_DCC) && + /* Always enable compression for Z/S and MSAA color by default. */ + (surf->flags & RADEON_SURF_Z_OR_SBUFFER || + config->info.samples > 1 || + /* TODO: enable display DCC after DAL is ready */ + (!(surf->flags & RADEON_SURF_SCANOUT) && + /* These two are not strictly necessary. */ + surf->u.gfx9.swizzle_mode != ADDR3_LINEAR && + surf->surf_size >= 4096))); surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); surf->is_linear = surf->u.gfx9.swizzle_mode == ADDR3_LINEAR; - surf->is_displayable = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); // TODO: how to set is_displayable? + surf->is_displayable = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && + surf->u.gfx9.resource_type != RADEON_RESOURCE_3D && + /* TODO: enable display DCC after DAL is ready */ + !surf->u.gfx9.gfx12_enable_dcc; surf->thick_tiling = surf->u.gfx9.swizzle_mode >= ADDR3_4KB_3D; if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) { @@ -3231,6 +3260,18 @@ static bool gfx12_compute_surface(struct ac_addrlib *addrlib, const struct radeo surf->u.gfx9.zs.his.size = 0; } + if (surf->u.gfx9.gfx12_enable_dcc) { + if (surf->modifier != DRM_FORMAT_MOD_INVALID) { + surf->u.gfx9.color.dcc.max_compressed_block_size = + AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, surf->modifier); + } else if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && + /* Don't change the DCC settings for imported buffers - they might differ. */ + !(surf->flags & RADEON_SURF_IMPORTED)) { + /* TODO: decide what to set for scanout buffers after DAL is ready */ + surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; + } + } + /* Calculate texture layout information. */ if (!stencil_only && !gfx12_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn)) @@ -3430,6 +3471,12 @@ void ac_surface_apply_bo_metadata(const struct radeon_info *info, struct radeon_ if (info->gfx_level >= GFX12) { surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, GFX12_SWIZZLE_MODE); + surf->u.gfx9.color.dcc.max_compressed_block_size = + AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK); + surf->u.gfx9.color.dcc_data_format = + AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT); + surf->u.gfx9.color.dcc_number_type = + AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE); scanout = AMDGPU_TILING_GET(tiling_flags, GFX12_SCANOUT); } else if (info->gfx_level >= GFX9) { surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); @@ -3473,6 +3520,10 @@ void ac_surface_compute_bo_metadata(const struct radeon_info *info, struct radeo if (info->gfx_level >= GFX12) { *tiling_flags |= AMDGPU_TILING_SET(GFX12_SWIZZLE_MODE, surf->u.gfx9.swizzle_mode); + *tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_MAX_COMPRESSED_BLOCK, + surf->u.gfx9.color.dcc.max_compressed_block_size); + *tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_NUMBER_TYPE, surf->u.gfx9.color.dcc_number_type); + *tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_DATA_FORMAT, surf->u.gfx9.color.dcc_data_format); *tiling_flags |= AMDGPU_TILING_SET(GFX12_SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0); } else if (info->gfx_level >= GFX9) { uint64_t dcc_offset = 0; diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index 5f812d47b52..4e529af9a82 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -236,6 +236,7 @@ struct gfx9_surf_layout { uint16_t epitch; /* gfx9 only, not on gfx10 */ uint8_t swizzle_mode; /* color or depth */ bool uses_custom_pitch; /* only used by gfx10.3+ */ + bool gfx12_enable_dcc; /* set AMDGPU_GEM_CREATE_GFX12_DCC if the placement is VRAM */ enum gfx9_resource_type resource_type:8; /* 1D, 2D or 3D */ uint32_t surf_pitch; /* up to 64K (in blocks) */ @@ -274,6 +275,12 @@ struct gfx9_surf_layout { uint8_t dcc_block_height; uint8_t dcc_block_depth; + /* Gfx12 DCC recompression settings used by kernel memory management. + * The driver sets these, not ac_compute_surface. + */ + uint8_t dcc_number_type; /* CB_COLOR0_INFO.NUMBER_TYPE */ + uint8_t dcc_data_format; /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ + /* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0. * The 3D engine doesn't support that layout except for chips with 1 RB. * All other chips must set rb_aligned=1.