From 1a6dd7f9b1c1a43d9e1b66637e13af1de3f2c3cc Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 19 May 2021 22:46:16 -0400 Subject: [PATCH] freedreno/common: unhardcode CCU color cache offset Replace it with a calculation which works for all current GPUs. Duplicated the calculation in both drivers because freedreno_dev_info isn't meant for derived parameters (and drivers might want to just calculate on the fly instead). Signed-off-by: Jonathan Marek Signed-off-by: Rob Clark Part-of: --- src/freedreno/.gitlab-ci/reference/crash.log | 2 +- ...exed.indirect_draw_count.triangle_list.log | 8 ++++---- .../.gitlab-ci/reference/fd-clouds.log | 8 ++++---- src/freedreno/common/freedreno_dev_info.c | 8 -------- src/freedreno/common/freedreno_dev_info.h | 20 ++++++++++++++++--- src/freedreno/registers/adreno/a6xx.xml | 18 ++++++++--------- src/freedreno/vulkan/tu_cmd_buffer.c | 8 ++++---- src/freedreno/vulkan/tu_device.c | 3 +++ src/freedreno/vulkan/tu_pass.c | 2 +- src/freedreno/vulkan/tu_private.h | 2 ++ .../drivers/freedreno/a6xx/fd6_blitter.c | 2 +- src/gallium/drivers/freedreno/a6xx/fd6_draw.c | 3 +-- src/gallium/drivers/freedreno/a6xx/fd6_gmem.c | 7 +++---- .../drivers/freedreno/freedreno_screen.c | 6 ++++++ .../drivers/freedreno/freedreno_screen.h | 2 ++ 15 files changed, 58 insertions(+), 41 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log index bd197b41b43..e81d16bb352 100644 --- a/src/freedreno/.gitlab-ci/reference/crash.log +++ b/src/freedreno/.gitlab-ci/reference/crash.log @@ -1555,7 +1555,7 @@ registers: 00000000 0xa630: 00000000 00100000 RB_UNKNOWN_8E04: 0x100000 00000001 RB_ADDR_MODE_CNTL: ADDR_64B - 00000000 RB_CCU_CNTL: { OFFSET = 0 } + 00000000 RB_CCU_CNTL: { COLOR_OFFSET = 0 } 00000004 RB_NC_MODE_CNTL: { LOWER_BIT = 2 | UPPER_BIT = 0 } 00000000 RB_PERFCTR_RB_SEL[0]+0: 00000000 00000000 RB_PERFCTR_RB_SEL[0x1]+0: 00000000 diff --git a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log index 6f09d3a8baf..f3f81a4ff3c 100644 --- a/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log +++ b/src/freedreno/.gitlab-ci/reference/dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list.log @@ -13,7 +13,7 @@ t4 write HLSQ_INVALIDATE_CMD (bb08) t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) 0000000001058010: 0000: 70268000 t4 write RB_CCU_CNTL (8e07) - RB_CCU_CNTL: { OFFSET = 0x20000 } + RB_CCU_CNTL: { COLOR_OFFSET = 0x20000 } 0000000001058014: 0000: 408e0701 10000000 t4 write RB_UNKNOWN_8E04 (8e04) RB_UNKNOWN_8E04: 0x100000 @@ -323,7 +323,7 @@ t7 opcode: CP_BLIT (2c) (2 dwords) !+ 000000ff RB_2D_SRC_SOLID_C3: 0xff + 00000000 RB_UNKNOWN_8E01: 0 !+ 00100000 RB_UNKNOWN_8E04: 0x100000 -!+ 10000000 RB_CCU_CNTL: { OFFSET = 0x20000 } +!+ 10000000 RB_CCU_CNTL: { COLOR_OFFSET = 0x20000 } + 00000000 VPC_UNKNOWN_9107: { 0 } + 00000000 VPC_UNKNOWN_9210: 0 + 00000000 VPC_UNKNOWN_9211: 0 @@ -399,7 +399,7 @@ t7 opcode: CP_EVENT_WRITE (46) (2 dwords) t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) 000000000105832c: 0000: 70268000 t4 write RB_CCU_CNTL (8e07) - RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM } + RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM } 0000000001058330: 0000: 408e0701 7c400000 t4 write VPC_SO_DISABLE (9306) VPC_SO_DISABLE: { 0 } @@ -504,7 +504,7 @@ t7 opcode: CP_EVENT_WRITE (46) (2 dwords) + 00000000 RB_BLIT_FLAG_DST_HI: 0 !+ 00004001 RB_BLIT_FLAG_DST_PITCH: { PITCH = 64 | ARRAY_PITCH = 1024 } !+ 00000003 RB_BLIT_INFO: { UNK0 | GMEM | CLEAR_MASK = 0 } -!+ 7c400000 RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM } +!+ 7c400000 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM } !+ 00000000 VPC_SO_DISABLE: { 0 } + 00000000 SP_TP_WINDOW_OFFSET: { X = 0 | Y = 0 } + 00000000 SP_WINDOW_OFFSET: { X = 0 | Y = 0 } diff --git a/src/freedreno/.gitlab-ci/reference/fd-clouds.log b/src/freedreno/.gitlab-ci/reference/fd-clouds.log index 4a3c5dc1c53..1763d3bbc55 100644 --- a/src/freedreno/.gitlab-ci/reference/fd-clouds.log +++ b/src/freedreno/.gitlab-ci/reference/fd-clouds.log @@ -246,7 +246,7 @@ t7 opcode: CP_SKIP_IB2_ENABLE_GLOBAL (1d) (2 dwords) t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) 0000000001d91278: 0000: 70268000 t4 write RB_CCU_CNTL (8e07) - RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 } + RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 } 0000000001d9127c: 0000: 408e0701 7c400004 t4 write RB_DEPTH_BUFFER_INFO (8872) RB_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE } @@ -1024,7 +1024,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords) + 00000000 RB_MRT_FLAG_BUFFER[0].PITCH: { PITCH = 0 | ARRAY_PITCH = 0 } !+ 00000001 RB_UNKNOWN_8E01: 0x1 + 00000000 RB_UNKNOWN_8E04: 0 -!+ 7c400004 RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 } +!+ 7c400004 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 } !+ 00ffff00 VPC_VS_CLIP_CNTL: { CLIP_MASK = 0 | CLIP_DIST_03_LOC = 255 | CLIP_DIST_47_LOC = 255 } !+ 0000ffff VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 } + 00000000 VPC_UNKNOWN_9107: { 0 } @@ -1517,7 +1517,7 @@ t7 opcode: CP_SET_MODE (63) (2 dwords) t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) 0000000001d91938: 0000: 70268000 t4 write RB_CCU_CNTL (8e07) - RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 } + RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 } 0000000001d9193c: 0000: 408e0701 7c400004 t4 write VPC_SO_DISABLE (9306) VPC_SO_DISABLE: { DISABLE } @@ -1695,7 +1695,7 @@ t7 opcode: CP_EVENT_WRITE (46) (2 dwords) + 00000000 RB_BLIT_CLEAR_COLOR_DW2: 0 + 00000000 RB_BLIT_CLEAR_COLOR_DW3: 0 !+ 000000f2 RB_BLIT_INFO: { GMEM | CLEAR_MASK = 0xf } - + 7c400004 RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 } + + 7c400004 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 } !+ 00000001 VPC_SO_DISABLE: { DISABLE } + 00000001 PC_UNKNOWN_9805: 0x1 !+ 00000000 VFD_MODE_CNTL: { 0 } diff --git a/src/freedreno/common/freedreno_dev_info.c b/src/freedreno/common/freedreno_dev_info.c index 6ed16e05ddf..13e5b63881e 100644 --- a/src/freedreno/common/freedreno_dev_info.c +++ b/src/freedreno/common/freedreno_dev_info.c @@ -49,8 +49,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id) case 618: info->num_sp_cores = 1; info->fibers_per_sp = 128 * 16; - info->a6xx.ccu_offset_gmem = 0x7c000; - info->a6xx.ccu_offset_bypass = 0x10000; info->a6xx.ccu_cntl_gmem_unk2 = true; info->a6xx.supports_multiview_mask = false; info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000; @@ -60,8 +58,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id) case 630: info->num_sp_cores = 2; info->fibers_per_sp = 128 * 16; - info->a6xx.ccu_offset_gmem = 0xf8000; - info->a6xx.ccu_offset_bypass = 0x20000; info->a6xx.ccu_cntl_gmem_unk2 = true; info->a6xx.supports_multiview_mask = false; info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x01000000; @@ -82,8 +78,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id) * the per-wave layout though. */ info->fibers_per_sp = 128 * 4 * 16; - info->a6xx.ccu_offset_gmem = 0xf8000; - info->a6xx.ccu_offset_bypass = 0x20000; info->a6xx.supports_multiview_mask = true; info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000; info->a6xx.magic.PC_UNKNOWN_9805 = 1; @@ -93,8 +87,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id) case 650: info->num_sp_cores = 3; info->fibers_per_sp = 128 * 2 * 16; - info->a6xx.ccu_offset_gmem = 0x114000; - info->a6xx.ccu_offset_bypass = 0x30000; info->a6xx.supports_multiview_mask = true; info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x04100000; info->a6xx.magic.PC_UNKNOWN_9805 = 2; diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h index 09013dcf0d3..2d06c269df7 100644 --- a/src/freedreno/common/freedreno_dev_info.h +++ b/src/freedreno/common/freedreno_dev_info.h @@ -46,8 +46,13 @@ struct freedreno_dev_info { uint32_t num_vsc_pipes; + /* number of CCU is always equal to the number of SP */ + union { + uint32_t num_sp_cores; + uint32_t num_ccu; + }; /* Information for private memory calculations */ - uint32_t num_sp_cores, fibers_per_sp; + uint32_t fibers_per_sp; union { struct { @@ -55,8 +60,6 @@ struct freedreno_dev_info { bool supports_multiview_mask; /* info for setting RB_CCU_CNTL */ - uint32_t ccu_offset_gmem; - uint32_t ccu_offset_bypass; bool ccu_cntl_gmem_unk2; bool has_z24uint_s8uint; @@ -69,6 +72,17 @@ struct freedreno_dev_info { }; }; +/* per CCU GMEM amount reserved for depth cache for direct rendering */ +#define A6XX_CCU_DEPTH_SIZE (64 * 1024) +/* per CCU GMEM amount reserved for color cache used by GMEM resolves + * which require color cache (non-BLIT event case). + * this is smaller than what is normally used by direct rendering + * (RB_CCU_CNTL.GMEM bit enables this smaller size) + * if a GMEM resolve requires color cache, the driver needs to make sure + * it will not overwrite pixel data in GMEM that is still needed + */ +#define A6XX_CCU_GMEM_COLOR_SIZE (16 * 1024) + void freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id); #ifdef __cplusplus diff --git a/src/freedreno/registers/adreno/a6xx.xml b/src/freedreno/registers/adreno/a6xx.xml index 5feac542d98..2e4e14b0402 100644 --- a/src/freedreno/registers/adreno/a6xx.xml +++ b/src/freedreno/registers/adreno/a6xx.xml @@ -2229,18 +2229,18 @@ to upconvert to 32b float internally? - - + + diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 60b3e26b124..cf801c339eb 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -160,10 +160,10 @@ tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer, if (ccu_state != cmd_buffer->state.ccu_state) { struct tu_physical_device *phys_dev = cmd_buffer->device->physical_device; tu_cs_emit_regs(cs, - A6XX_RB_CCU_CNTL(.offset = + A6XX_RB_CCU_CNTL(.color_offset = ccu_state == TU_CMD_CCU_GMEM ? - phys_dev->info.a6xx.ccu_offset_gmem : - phys_dev->info.a6xx.ccu_offset_bypass, + phys_dev->ccu_offset_gmem : + phys_dev->ccu_offset_bypass, .gmem = ccu_state == TU_CMD_CCU_GMEM)); cmd_buffer->state.ccu_state = ccu_state; } @@ -712,7 +712,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) ~(TU_CMD_FLAG_WAIT_FOR_IDLE | TU_CMD_FLAG_CACHE_INVALIDATE); tu_cs_emit_regs(cs, - A6XX_RB_CCU_CNTL(.offset = phys_dev->info.a6xx.ccu_offset_bypass)); + A6XX_RB_CCU_CNTL(.color_offset = phys_dev->ccu_offset_bypass)); cmd->state.ccu_state = TU_CMD_CCU_SYSMEM; tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000); tu_cs_emit_write_reg(cs, REG_A6XX_SP_FLOAT_CNTL, 0); diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c index 6c183487de1..c8e24496c86 100644 --- a/src/freedreno/vulkan/tu_device.c +++ b/src/freedreno/vulkan/tu_device.c @@ -202,6 +202,9 @@ tu_physical_device_init(struct tu_physical_device *device, case 640: case 650: freedreno_dev_info_init(&device->info, device->gpu_id); + device->ccu_offset_bypass = device->info.num_ccu * A6XX_CCU_DEPTH_SIZE; + device->ccu_offset_gmem = (device->gmem_size - + device->info.num_ccu * A6XX_CCU_GMEM_COLOR_SIZE); break; default: result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c index 22f16622710..403376bbada 100644 --- a/src/freedreno/vulkan/tu_pass.c +++ b/src/freedreno/vulkan/tu_pass.c @@ -386,7 +386,7 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass, * result: nblocks = {12, 52}, pixels = 196608 * optimal: nblocks = {13, 51}, pixels = 208896 */ - uint32_t gmem_blocks = phys_dev->info.a6xx.ccu_offset_gmem / gmem_align; + uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align; uint32_t offset = 0, pixels = ~0u, i; for (i = 0; i < pass->attachment_count; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index aa04a651c67..fd164a6f7d2 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -207,6 +207,8 @@ struct tu_physical_device unsigned gpu_id; uint32_t gmem_size; uint64_t gmem_base; + uint32_t ccu_offset_gmem; + uint32_t ccu_offset_bypass; struct freedreno_dev_info info; diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c index 7295d65c4cd..2cfe7b518c4 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_blitter.c @@ -245,7 +245,7 @@ emit_setup(struct fd_batch *batch) /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */ OUT_WFI5(ring); OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1); - OUT_RING(ring, A6XX_RB_CCU_CNTL_OFFSET(screen->info.a6xx.ccu_offset_bypass)); + OUT_RING(ring, A6XX_RB_CCU_CNTL_COLOR_OFFSET(screen->ccu_offset_bypass)); } static void diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c index dfb6f9a904e..428531f47b0 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c @@ -380,8 +380,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth) OUT_WFI5(ring); - OUT_REG(ring, - A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_bypass)); + OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass)); OUT_REG(ring, A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c index e5ee771b7c8..28d6a227924 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c @@ -740,7 +740,7 @@ emit_binning_pass(struct fd_batch *batch) assert_dt OUT_WFI5(ring); OUT_REG(ring, - A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_gmem, + A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem, .gmem = true, .unk2 = screen->info.a6xx.ccu_cntl_gmem_unk2)); } @@ -808,7 +808,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt fd_wfi(batch, ring); OUT_REG(ring, - A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_gmem, + A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem, .gmem = true, .unk2 = screen->info.a6xx.ccu_cntl_gmem_unk2)); @@ -1585,8 +1585,7 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt fd6_cache_inv(batch, ring); fd_wfi(batch, ring); - OUT_REG(ring, - A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_bypass)); + OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass)); /* enable stream-out, with sysmem there is only one pass: */ OUT_REG(ring, A6XX_VPC_SO_DISABLE(false)); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 72fe7112d3a..a0af7241de7 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -1067,6 +1067,12 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro) freedreno_dev_info_init(&screen->info, screen->gpu_id); + if (is_a6xx(screen)) { + screen->ccu_offset_bypass = screen->info.num_ccu * A6XX_CCU_DEPTH_SIZE; + screen->ccu_offset_gmem = (screen->gmemsize_bytes - + screen->info.num_ccu * A6XX_CCU_GMEM_COLOR_SIZE); + } + if (FD_DBG(PERFC)) { screen->perfcntr_groups = fd_perfcntrs(screen->gpu_id, &screen->num_perfcntr_groups); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index fb17ddf1a67..b0e70e3b1a5 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -92,6 +92,8 @@ struct fd_screen { bool has_syncobj; struct freedreno_dev_info info; + uint32_t ccu_offset_gmem; + uint32_t ccu_offset_bypass; /* Bitmask of gmem_reasons that do not force GMEM path over bypass * for current generation.