freedreno/common: unhardcode CCU color cache offset

Replace it with a calculation which works for all current GPUs.

Duplicated the calculation in both drivers because freedreno_dev_info isn't
meant for derived parameters (and drivers might want to just calculate on
the fly instead).

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11790>
This commit is contained in:
Jonathan Marek
2021-05-19 22:46:16 -04:00
committed by Marge Bot
parent a37460e7ec
commit 1a6dd7f9b1
15 changed files with 58 additions and 41 deletions

View File

@@ -1555,7 +1555,7 @@ registers:
00000000 0xa630: 00000000
00100000 RB_UNKNOWN_8E04: 0x100000
00000001 RB_ADDR_MODE_CNTL: ADDR_64B
00000000 RB_CCU_CNTL: { OFFSET = 0 }
00000000 RB_CCU_CNTL: { COLOR_OFFSET = 0 }
00000004 RB_NC_MODE_CNTL: { LOWER_BIT = 2 | UPPER_BIT = 0 }
00000000 RB_PERFCTR_RB_SEL[0]+0: 00000000
00000000 RB_PERFCTR_RB_SEL[0x1]+0: 00000000

View File

@@ -13,7 +13,7 @@ t4 write HLSQ_INVALIDATE_CMD (bb08)
t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
0000000001058010: 0000: 70268000
t4 write RB_CCU_CNTL (8e07)
RB_CCU_CNTL: { OFFSET = 0x20000 }
RB_CCU_CNTL: { COLOR_OFFSET = 0x20000 }
0000000001058014: 0000: 408e0701 10000000
t4 write RB_UNKNOWN_8E04 (8e04)
RB_UNKNOWN_8E04: 0x100000
@@ -323,7 +323,7 @@ t7 opcode: CP_BLIT (2c) (2 dwords)
!+ 000000ff RB_2D_SRC_SOLID_C3: 0xff
+ 00000000 RB_UNKNOWN_8E01: 0
!+ 00100000 RB_UNKNOWN_8E04: 0x100000
!+ 10000000 RB_CCU_CNTL: { OFFSET = 0x20000 }
!+ 10000000 RB_CCU_CNTL: { COLOR_OFFSET = 0x20000 }
+ 00000000 VPC_UNKNOWN_9107: { 0 }
+ 00000000 VPC_UNKNOWN_9210: 0
+ 00000000 VPC_UNKNOWN_9211: 0
@@ -399,7 +399,7 @@ t7 opcode: CP_EVENT_WRITE (46) (2 dwords)
t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
000000000105832c: 0000: 70268000
t4 write RB_CCU_CNTL (8e07)
RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM }
RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM }
0000000001058330: 0000: 408e0701 7c400000
t4 write VPC_SO_DISABLE (9306)
VPC_SO_DISABLE: { 0 }
@@ -504,7 +504,7 @@ t7 opcode: CP_EVENT_WRITE (46) (2 dwords)
+ 00000000 RB_BLIT_FLAG_DST_HI: 0
!+ 00004001 RB_BLIT_FLAG_DST_PITCH: { PITCH = 64 | ARRAY_PITCH = 1024 }
!+ 00000003 RB_BLIT_INFO: { UNK0 | GMEM | CLEAR_MASK = 0 }
!+ 7c400000 RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM }
!+ 7c400000 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM }
!+ 00000000 VPC_SO_DISABLE: { 0 }
+ 00000000 SP_TP_WINDOW_OFFSET: { X = 0 | Y = 0 }
+ 00000000 SP_WINDOW_OFFSET: { X = 0 | Y = 0 }

View File

@@ -246,7 +246,7 @@ t7 opcode: CP_SKIP_IB2_ENABLE_GLOBAL (1d) (2 dwords)
t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
0000000001d91278: 0000: 70268000
t4 write RB_CCU_CNTL (8e07)
RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 }
RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 }
0000000001d9127c: 0000: 408e0701 7c400004
t4 write RB_DEPTH_BUFFER_INFO (8872)
RB_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
@@ -1024,7 +1024,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
+ 00000000 RB_MRT_FLAG_BUFFER[0].PITCH: { PITCH = 0 | ARRAY_PITCH = 0 }
!+ 00000001 RB_UNKNOWN_8E01: 0x1
+ 00000000 RB_UNKNOWN_8E04: 0
!+ 7c400004 RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 }
!+ 7c400004 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 }
!+ 00ffff00 VPC_VS_CLIP_CNTL: { CLIP_MASK = 0 | CLIP_DIST_03_LOC = 255 | CLIP_DIST_47_LOC = 255 }
!+ 0000ffff VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
+ 00000000 VPC_UNKNOWN_9107: { 0 }
@@ -1517,7 +1517,7 @@ t7 opcode: CP_SET_MODE (63) (2 dwords)
t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
0000000001d91938: 0000: 70268000
t4 write RB_CCU_CNTL (8e07)
RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 }
RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 }
0000000001d9193c: 0000: 408e0701 7c400004
t4 write VPC_SO_DISABLE (9306)
VPC_SO_DISABLE: { DISABLE }
@@ -1695,7 +1695,7 @@ t7 opcode: CP_EVENT_WRITE (46) (2 dwords)
+ 00000000 RB_BLIT_CLEAR_COLOR_DW2: 0
+ 00000000 RB_BLIT_CLEAR_COLOR_DW3: 0
!+ 000000f2 RB_BLIT_INFO: { GMEM | CLEAR_MASK = 0xf }
+ 7c400004 RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 }
+ 7c400004 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 }
!+ 00000001 VPC_SO_DISABLE: { DISABLE }
+ 00000001 PC_UNKNOWN_9805: 0x1
!+ 00000000 VFD_MODE_CNTL: { 0 }

View File

@@ -49,8 +49,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
case 618:
info->num_sp_cores = 1;
info->fibers_per_sp = 128 * 16;
info->a6xx.ccu_offset_gmem = 0x7c000;
info->a6xx.ccu_offset_bypass = 0x10000;
info->a6xx.ccu_cntl_gmem_unk2 = true;
info->a6xx.supports_multiview_mask = false;
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
@@ -60,8 +58,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
case 630:
info->num_sp_cores = 2;
info->fibers_per_sp = 128 * 16;
info->a6xx.ccu_offset_gmem = 0xf8000;
info->a6xx.ccu_offset_bypass = 0x20000;
info->a6xx.ccu_cntl_gmem_unk2 = true;
info->a6xx.supports_multiview_mask = false;
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x01000000;
@@ -82,8 +78,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
* the per-wave layout though.
*/
info->fibers_per_sp = 128 * 4 * 16;
info->a6xx.ccu_offset_gmem = 0xf8000;
info->a6xx.ccu_offset_bypass = 0x20000;
info->a6xx.supports_multiview_mask = true;
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
info->a6xx.magic.PC_UNKNOWN_9805 = 1;
@@ -93,8 +87,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
case 650:
info->num_sp_cores = 3;
info->fibers_per_sp = 128 * 2 * 16;
info->a6xx.ccu_offset_gmem = 0x114000;
info->a6xx.ccu_offset_bypass = 0x30000;
info->a6xx.supports_multiview_mask = true;
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x04100000;
info->a6xx.magic.PC_UNKNOWN_9805 = 2;

View File

@@ -46,8 +46,13 @@ struct freedreno_dev_info {
uint32_t num_vsc_pipes;
/* number of CCU is always equal to the number of SP */
union {
uint32_t num_sp_cores;
uint32_t num_ccu;
};
/* Information for private memory calculations */
uint32_t num_sp_cores, fibers_per_sp;
uint32_t fibers_per_sp;
union {
struct {
@@ -55,8 +60,6 @@ struct freedreno_dev_info {
bool supports_multiview_mask;
/* info for setting RB_CCU_CNTL */
uint32_t ccu_offset_gmem;
uint32_t ccu_offset_bypass;
bool ccu_cntl_gmem_unk2;
bool has_z24uint_s8uint;
@@ -69,6 +72,17 @@ struct freedreno_dev_info {
};
};
/* per CCU GMEM amount reserved for depth cache for direct rendering */
#define A6XX_CCU_DEPTH_SIZE (64 * 1024)
/* per CCU GMEM amount reserved for color cache used by GMEM resolves
* which require color cache (non-BLIT event case).
* this is smaller than what is normally used by direct rendering
* (RB_CCU_CNTL.GMEM bit enables this smaller size)
* if a GMEM resolve requires color cache, the driver needs to make sure
* it will not overwrite pixel data in GMEM that is still needed
*/
#define A6XX_CCU_GMEM_COLOR_SIZE (16 * 1024)
void freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id);
#ifdef __cplusplus

View File

@@ -2229,18 +2229,18 @@ to upconvert to 32b float internally?
<reg32 offset="0x8e05" name="RB_ADDR_MODE_CNTL" pos="0" type="a5xx_address_mode"/>
<!-- 0x8e06 invalid -->
<reg32 offset="0x8e07" name="RB_CCU_CNTL">
<!-- offset into GMEM for something.
important for sysmem path
BLIT_OP_SCALE also writes to GMEM at this offset for GMEM store
blob values for GMEM path (note: close to GMEM size):
a618: 0x7c000 a630/a640: 0xf8000 a650: 0x114000
SYSMEM path values:
a618: 0x10000 a630/a640: 0x20000 a650: 0x30000
TODO: valid mask 0xfffffc1f
<!-- GMEM offset of CCU color cache
CCU depth cache starts at zero, so this should be the size
of the depth cache for direct rendering
for GMEM rendering, we set it to GMEM size minus the minimum
CCU color cache size. CCU color cache will be needed in some
resolve cases, and in those cases we need to reserve the end
of GMEM for color cache.
-->
<bitfield name="OFFSET" low="23" high="31" shr="12" type="hex"/>
<bitfield name="COLOR_OFFSET" low="23" high="31" shr="12" type="hex"/>
<bitfield name="GMEM" pos="22" type="boolean"/> <!-- set for GMEM path -->
<bitfield name="UNK2" pos="2" type="boolean"/> <!-- sometimes set with GMEM? -->
<!--TODO: valid mask 0xfffffc1f -->
</reg32>
<reg32 offset="0x8e08" name="RB_NC_MODE_CNTL">
<bitfield name="MODE" pos="0" type="boolean"/>

View File

@@ -160,10 +160,10 @@ tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
if (ccu_state != cmd_buffer->state.ccu_state) {
struct tu_physical_device *phys_dev = cmd_buffer->device->physical_device;
tu_cs_emit_regs(cs,
A6XX_RB_CCU_CNTL(.offset =
A6XX_RB_CCU_CNTL(.color_offset =
ccu_state == TU_CMD_CCU_GMEM ?
phys_dev->info.a6xx.ccu_offset_gmem :
phys_dev->info.a6xx.ccu_offset_bypass,
phys_dev->ccu_offset_gmem :
phys_dev->ccu_offset_bypass,
.gmem = ccu_state == TU_CMD_CCU_GMEM));
cmd_buffer->state.ccu_state = ccu_state;
}
@@ -712,7 +712,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
~(TU_CMD_FLAG_WAIT_FOR_IDLE | TU_CMD_FLAG_CACHE_INVALIDATE);
tu_cs_emit_regs(cs,
A6XX_RB_CCU_CNTL(.offset = phys_dev->info.a6xx.ccu_offset_bypass));
A6XX_RB_CCU_CNTL(.color_offset = phys_dev->ccu_offset_bypass));
cmd->state.ccu_state = TU_CMD_CCU_SYSMEM;
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_FLOAT_CNTL, 0);

View File

@@ -202,6 +202,9 @@ tu_physical_device_init(struct tu_physical_device *device,
case 640:
case 650:
freedreno_dev_info_init(&device->info, device->gpu_id);
device->ccu_offset_bypass = device->info.num_ccu * A6XX_CCU_DEPTH_SIZE;
device->ccu_offset_gmem = (device->gmem_size -
device->info.num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
break;
default:
result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,

View File

@@ -386,7 +386,7 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass,
* result: nblocks = {12, 52}, pixels = 196608
* optimal: nblocks = {13, 51}, pixels = 208896
*/
uint32_t gmem_blocks = phys_dev->info.a6xx.ccu_offset_gmem / gmem_align;
uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
uint32_t offset = 0, pixels = ~0u, i;
for (i = 0; i < pass->attachment_count; i++) {
struct tu_render_pass_attachment *att = &pass->attachments[i];

View File

@@ -207,6 +207,8 @@ struct tu_physical_device
unsigned gpu_id;
uint32_t gmem_size;
uint64_t gmem_base;
uint32_t ccu_offset_gmem;
uint32_t ccu_offset_bypass;
struct freedreno_dev_info info;

View File

@@ -245,7 +245,7 @@ emit_setup(struct fd_batch *batch)
/* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
OUT_WFI5(ring);
OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
OUT_RING(ring, A6XX_RB_CCU_CNTL_OFFSET(screen->info.a6xx.ccu_offset_bypass));
OUT_RING(ring, A6XX_RB_CCU_CNTL_COLOR_OFFSET(screen->ccu_offset_bypass));
}
static void

View File

@@ -380,8 +380,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
OUT_WFI5(ring);
OUT_REG(ring,
A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_bypass));
OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));
OUT_REG(ring,
A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,

View File

@@ -740,7 +740,7 @@ emit_binning_pass(struct fd_batch *batch) assert_dt
OUT_WFI5(ring);
OUT_REG(ring,
A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_gmem,
A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
.gmem = true,
.unk2 = screen->info.a6xx.ccu_cntl_gmem_unk2));
}
@@ -808,7 +808,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
fd_wfi(batch, ring);
OUT_REG(ring,
A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_gmem,
A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
.gmem = true,
.unk2 = screen->info.a6xx.ccu_cntl_gmem_unk2));
@@ -1585,8 +1585,7 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
fd6_cache_inv(batch, ring);
fd_wfi(batch, ring);
OUT_REG(ring,
A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_bypass));
OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));
/* enable stream-out, with sysmem there is only one pass: */
OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));

View File

@@ -1067,6 +1067,12 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
freedreno_dev_info_init(&screen->info, screen->gpu_id);
if (is_a6xx(screen)) {
screen->ccu_offset_bypass = screen->info.num_ccu * A6XX_CCU_DEPTH_SIZE;
screen->ccu_offset_gmem = (screen->gmemsize_bytes -
screen->info.num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
}
if (FD_DBG(PERFC)) {
screen->perfcntr_groups =
fd_perfcntrs(screen->gpu_id, &screen->num_perfcntr_groups);

View File

@@ -92,6 +92,8 @@ struct fd_screen {
bool has_syncobj;
struct freedreno_dev_info info;
uint32_t ccu_offset_gmem;
uint32_t ccu_offset_bypass;
/* Bitmask of gmem_reasons that do not force GMEM path over bypass
* for current generation.