freedreno/common: unhardcode CCU color cache offset
Replace it with a calculation which works for all current GPUs. Duplicated the calculation in both drivers because freedreno_dev_info isn't meant for derived parameters (and drivers might want to just calculate on the fly instead). Signed-off-by: Jonathan Marek <jonathan@marek.ca> Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11790>
This commit is contained in:

committed by
Marge Bot

parent
a37460e7ec
commit
1a6dd7f9b1
@@ -1555,7 +1555,7 @@ registers:
|
||||
00000000 0xa630: 00000000
|
||||
00100000 RB_UNKNOWN_8E04: 0x100000
|
||||
00000001 RB_ADDR_MODE_CNTL: ADDR_64B
|
||||
00000000 RB_CCU_CNTL: { OFFSET = 0 }
|
||||
00000000 RB_CCU_CNTL: { COLOR_OFFSET = 0 }
|
||||
00000004 RB_NC_MODE_CNTL: { LOWER_BIT = 2 | UPPER_BIT = 0 }
|
||||
00000000 RB_PERFCTR_RB_SEL[0]+0: 00000000
|
||||
00000000 RB_PERFCTR_RB_SEL[0x1]+0: 00000000
|
||||
|
@@ -13,7 +13,7 @@ t4 write HLSQ_INVALIDATE_CMD (bb08)
|
||||
t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
|
||||
0000000001058010: 0000: 70268000
|
||||
t4 write RB_CCU_CNTL (8e07)
|
||||
RB_CCU_CNTL: { OFFSET = 0x20000 }
|
||||
RB_CCU_CNTL: { COLOR_OFFSET = 0x20000 }
|
||||
0000000001058014: 0000: 408e0701 10000000
|
||||
t4 write RB_UNKNOWN_8E04 (8e04)
|
||||
RB_UNKNOWN_8E04: 0x100000
|
||||
@@ -323,7 +323,7 @@ t7 opcode: CP_BLIT (2c) (2 dwords)
|
||||
!+ 000000ff RB_2D_SRC_SOLID_C3: 0xff
|
||||
+ 00000000 RB_UNKNOWN_8E01: 0
|
||||
!+ 00100000 RB_UNKNOWN_8E04: 0x100000
|
||||
!+ 10000000 RB_CCU_CNTL: { OFFSET = 0x20000 }
|
||||
!+ 10000000 RB_CCU_CNTL: { COLOR_OFFSET = 0x20000 }
|
||||
+ 00000000 VPC_UNKNOWN_9107: { 0 }
|
||||
+ 00000000 VPC_UNKNOWN_9210: 0
|
||||
+ 00000000 VPC_UNKNOWN_9211: 0
|
||||
@@ -399,7 +399,7 @@ t7 opcode: CP_EVENT_WRITE (46) (2 dwords)
|
||||
t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
|
||||
000000000105832c: 0000: 70268000
|
||||
t4 write RB_CCU_CNTL (8e07)
|
||||
RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM }
|
||||
RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM }
|
||||
0000000001058330: 0000: 408e0701 7c400000
|
||||
t4 write VPC_SO_DISABLE (9306)
|
||||
VPC_SO_DISABLE: { 0 }
|
||||
@@ -504,7 +504,7 @@ t7 opcode: CP_EVENT_WRITE (46) (2 dwords)
|
||||
+ 00000000 RB_BLIT_FLAG_DST_HI: 0
|
||||
!+ 00004001 RB_BLIT_FLAG_DST_PITCH: { PITCH = 64 | ARRAY_PITCH = 1024 }
|
||||
!+ 00000003 RB_BLIT_INFO: { UNK0 | GMEM | CLEAR_MASK = 0 }
|
||||
!+ 7c400000 RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM }
|
||||
!+ 7c400000 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM }
|
||||
!+ 00000000 VPC_SO_DISABLE: { 0 }
|
||||
+ 00000000 SP_TP_WINDOW_OFFSET: { X = 0 | Y = 0 }
|
||||
+ 00000000 SP_WINDOW_OFFSET: { X = 0 | Y = 0 }
|
||||
|
@@ -246,7 +246,7 @@ t7 opcode: CP_SKIP_IB2_ENABLE_GLOBAL (1d) (2 dwords)
|
||||
t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
|
||||
0000000001d91278: 0000: 70268000
|
||||
t4 write RB_CCU_CNTL (8e07)
|
||||
RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 }
|
||||
RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 }
|
||||
0000000001d9127c: 0000: 408e0701 7c400004
|
||||
t4 write RB_DEPTH_BUFFER_INFO (8872)
|
||||
RB_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
|
||||
@@ -1024,7 +1024,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
|
||||
+ 00000000 RB_MRT_FLAG_BUFFER[0].PITCH: { PITCH = 0 | ARRAY_PITCH = 0 }
|
||||
!+ 00000001 RB_UNKNOWN_8E01: 0x1
|
||||
+ 00000000 RB_UNKNOWN_8E04: 0
|
||||
!+ 7c400004 RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 }
|
||||
!+ 7c400004 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 }
|
||||
!+ 00ffff00 VPC_VS_CLIP_CNTL: { CLIP_MASK = 0 | CLIP_DIST_03_LOC = 255 | CLIP_DIST_47_LOC = 255 }
|
||||
!+ 0000ffff VPC_VS_LAYER_CNTL: { LAYERLOC = 255 | VIEWLOC = 255 }
|
||||
+ 00000000 VPC_UNKNOWN_9107: { 0 }
|
||||
@@ -1517,7 +1517,7 @@ t7 opcode: CP_SET_MODE (63) (2 dwords)
|
||||
t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
|
||||
0000000001d91938: 0000: 70268000
|
||||
t4 write RB_CCU_CNTL (8e07)
|
||||
RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 }
|
||||
RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 }
|
||||
0000000001d9193c: 0000: 408e0701 7c400004
|
||||
t4 write VPC_SO_DISABLE (9306)
|
||||
VPC_SO_DISABLE: { DISABLE }
|
||||
@@ -1695,7 +1695,7 @@ t7 opcode: CP_EVENT_WRITE (46) (2 dwords)
|
||||
+ 00000000 RB_BLIT_CLEAR_COLOR_DW2: 0
|
||||
+ 00000000 RB_BLIT_CLEAR_COLOR_DW3: 0
|
||||
!+ 000000f2 RB_BLIT_INFO: { GMEM | CLEAR_MASK = 0xf }
|
||||
+ 7c400004 RB_CCU_CNTL: { OFFSET = 0xf8000 | GMEM | UNK2 }
|
||||
+ 7c400004 RB_CCU_CNTL: { COLOR_OFFSET = 0xf8000 | GMEM | UNK2 }
|
||||
!+ 00000001 VPC_SO_DISABLE: { DISABLE }
|
||||
+ 00000001 PC_UNKNOWN_9805: 0x1
|
||||
!+ 00000000 VFD_MODE_CNTL: { 0 }
|
||||
|
@@ -49,8 +49,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
|
||||
case 618:
|
||||
info->num_sp_cores = 1;
|
||||
info->fibers_per_sp = 128 * 16;
|
||||
info->a6xx.ccu_offset_gmem = 0x7c000;
|
||||
info->a6xx.ccu_offset_bypass = 0x10000;
|
||||
info->a6xx.ccu_cntl_gmem_unk2 = true;
|
||||
info->a6xx.supports_multiview_mask = false;
|
||||
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
|
||||
@@ -60,8 +58,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
|
||||
case 630:
|
||||
info->num_sp_cores = 2;
|
||||
info->fibers_per_sp = 128 * 16;
|
||||
info->a6xx.ccu_offset_gmem = 0xf8000;
|
||||
info->a6xx.ccu_offset_bypass = 0x20000;
|
||||
info->a6xx.ccu_cntl_gmem_unk2 = true;
|
||||
info->a6xx.supports_multiview_mask = false;
|
||||
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x01000000;
|
||||
@@ -82,8 +78,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
|
||||
* the per-wave layout though.
|
||||
*/
|
||||
info->fibers_per_sp = 128 * 4 * 16;
|
||||
info->a6xx.ccu_offset_gmem = 0xf8000;
|
||||
info->a6xx.ccu_offset_bypass = 0x20000;
|
||||
info->a6xx.supports_multiview_mask = true;
|
||||
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x00100000;
|
||||
info->a6xx.magic.PC_UNKNOWN_9805 = 1;
|
||||
@@ -93,8 +87,6 @@ freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id)
|
||||
case 650:
|
||||
info->num_sp_cores = 3;
|
||||
info->fibers_per_sp = 128 * 2 * 16;
|
||||
info->a6xx.ccu_offset_gmem = 0x114000;
|
||||
info->a6xx.ccu_offset_bypass = 0x30000;
|
||||
info->a6xx.supports_multiview_mask = true;
|
||||
info->a6xx.magic.RB_UNKNOWN_8E04_blit = 0x04100000;
|
||||
info->a6xx.magic.PC_UNKNOWN_9805 = 2;
|
||||
|
@@ -46,8 +46,13 @@ struct freedreno_dev_info {
|
||||
|
||||
uint32_t num_vsc_pipes;
|
||||
|
||||
/* number of CCU is always equal to the number of SP */
|
||||
union {
|
||||
uint32_t num_sp_cores;
|
||||
uint32_t num_ccu;
|
||||
};
|
||||
/* Information for private memory calculations */
|
||||
uint32_t num_sp_cores, fibers_per_sp;
|
||||
uint32_t fibers_per_sp;
|
||||
|
||||
union {
|
||||
struct {
|
||||
@@ -55,8 +60,6 @@ struct freedreno_dev_info {
|
||||
bool supports_multiview_mask;
|
||||
|
||||
/* info for setting RB_CCU_CNTL */
|
||||
uint32_t ccu_offset_gmem;
|
||||
uint32_t ccu_offset_bypass;
|
||||
bool ccu_cntl_gmem_unk2;
|
||||
bool has_z24uint_s8uint;
|
||||
|
||||
@@ -69,6 +72,17 @@ struct freedreno_dev_info {
|
||||
};
|
||||
};
|
||||
|
||||
/* per CCU GMEM amount reserved for depth cache for direct rendering */
|
||||
#define A6XX_CCU_DEPTH_SIZE (64 * 1024)
|
||||
/* per CCU GMEM amount reserved for color cache used by GMEM resolves
|
||||
* which require color cache (non-BLIT event case).
|
||||
* this is smaller than what is normally used by direct rendering
|
||||
* (RB_CCU_CNTL.GMEM bit enables this smaller size)
|
||||
* if a GMEM resolve requires color cache, the driver needs to make sure
|
||||
* it will not overwrite pixel data in GMEM that is still needed
|
||||
*/
|
||||
#define A6XX_CCU_GMEM_COLOR_SIZE (16 * 1024)
|
||||
|
||||
void freedreno_dev_info_init(struct freedreno_dev_info *info, uint32_t gpu_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@@ -2229,18 +2229,18 @@ to upconvert to 32b float internally?
|
||||
<reg32 offset="0x8e05" name="RB_ADDR_MODE_CNTL" pos="0" type="a5xx_address_mode"/>
|
||||
<!-- 0x8e06 invalid -->
|
||||
<reg32 offset="0x8e07" name="RB_CCU_CNTL">
|
||||
<!-- offset into GMEM for something.
|
||||
important for sysmem path
|
||||
BLIT_OP_SCALE also writes to GMEM at this offset for GMEM store
|
||||
blob values for GMEM path (note: close to GMEM size):
|
||||
a618: 0x7c000 a630/a640: 0xf8000 a650: 0x114000
|
||||
SYSMEM path values:
|
||||
a618: 0x10000 a630/a640: 0x20000 a650: 0x30000
|
||||
TODO: valid mask 0xfffffc1f
|
||||
<!-- GMEM offset of CCU color cache
|
||||
CCU depth cache starts at zero, so this should be the size
|
||||
of the depth cache for direct rendering
|
||||
for GMEM rendering, we set it to GMEM size minus the minimum
|
||||
CCU color cache size. CCU color cache will be needed in some
|
||||
resolve cases, and in those cases we need to reserve the end
|
||||
of GMEM for color cache.
|
||||
-->
|
||||
<bitfield name="OFFSET" low="23" high="31" shr="12" type="hex"/>
|
||||
<bitfield name="COLOR_OFFSET" low="23" high="31" shr="12" type="hex"/>
|
||||
<bitfield name="GMEM" pos="22" type="boolean"/> <!-- set for GMEM path -->
|
||||
<bitfield name="UNK2" pos="2" type="boolean"/> <!-- sometimes set with GMEM? -->
|
||||
<!--TODO: valid mask 0xfffffc1f -->
|
||||
</reg32>
|
||||
<reg32 offset="0x8e08" name="RB_NC_MODE_CNTL">
|
||||
<bitfield name="MODE" pos="0" type="boolean"/>
|
||||
|
@@ -160,10 +160,10 @@ tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
|
||||
if (ccu_state != cmd_buffer->state.ccu_state) {
|
||||
struct tu_physical_device *phys_dev = cmd_buffer->device->physical_device;
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_CCU_CNTL(.offset =
|
||||
A6XX_RB_CCU_CNTL(.color_offset =
|
||||
ccu_state == TU_CMD_CCU_GMEM ?
|
||||
phys_dev->info.a6xx.ccu_offset_gmem :
|
||||
phys_dev->info.a6xx.ccu_offset_bypass,
|
||||
phys_dev->ccu_offset_gmem :
|
||||
phys_dev->ccu_offset_bypass,
|
||||
.gmem = ccu_state == TU_CMD_CCU_GMEM));
|
||||
cmd_buffer->state.ccu_state = ccu_state;
|
||||
}
|
||||
@@ -712,7 +712,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
~(TU_CMD_FLAG_WAIT_FOR_IDLE | TU_CMD_FLAG_CACHE_INVALIDATE);
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
A6XX_RB_CCU_CNTL(.offset = phys_dev->info.a6xx.ccu_offset_bypass));
|
||||
A6XX_RB_CCU_CNTL(.color_offset = phys_dev->ccu_offset_bypass));
|
||||
cmd->state.ccu_state = TU_CMD_CCU_SYSMEM;
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000);
|
||||
tu_cs_emit_write_reg(cs, REG_A6XX_SP_FLOAT_CNTL, 0);
|
||||
|
@@ -202,6 +202,9 @@ tu_physical_device_init(struct tu_physical_device *device,
|
||||
case 640:
|
||||
case 650:
|
||||
freedreno_dev_info_init(&device->info, device->gpu_id);
|
||||
device->ccu_offset_bypass = device->info.num_ccu * A6XX_CCU_DEPTH_SIZE;
|
||||
device->ccu_offset_gmem = (device->gmem_size -
|
||||
device->info.num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
|
||||
break;
|
||||
default:
|
||||
result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
|
||||
|
@@ -386,7 +386,7 @@ tu_render_pass_gmem_config(struct tu_render_pass *pass,
|
||||
* result: nblocks = {12, 52}, pixels = 196608
|
||||
* optimal: nblocks = {13, 51}, pixels = 208896
|
||||
*/
|
||||
uint32_t gmem_blocks = phys_dev->info.a6xx.ccu_offset_gmem / gmem_align;
|
||||
uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;
|
||||
uint32_t offset = 0, pixels = ~0u, i;
|
||||
for (i = 0; i < pass->attachment_count; i++) {
|
||||
struct tu_render_pass_attachment *att = &pass->attachments[i];
|
||||
|
@@ -207,6 +207,8 @@ struct tu_physical_device
|
||||
unsigned gpu_id;
|
||||
uint32_t gmem_size;
|
||||
uint64_t gmem_base;
|
||||
uint32_t ccu_offset_gmem;
|
||||
uint32_t ccu_offset_bypass;
|
||||
|
||||
struct freedreno_dev_info info;
|
||||
|
||||
|
@@ -245,7 +245,7 @@ emit_setup(struct fd_batch *batch)
|
||||
/* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
|
||||
OUT_WFI5(ring);
|
||||
OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
|
||||
OUT_RING(ring, A6XX_RB_CCU_CNTL_OFFSET(screen->info.a6xx.ccu_offset_bypass));
|
||||
OUT_RING(ring, A6XX_RB_CCU_CNTL_COLOR_OFFSET(screen->ccu_offset_bypass));
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -380,8 +380,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
|
||||
|
||||
OUT_WFI5(ring);
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_bypass));
|
||||
OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_HLSQ_INVALIDATE_CMD(.vs_state = true, .hs_state = true,
|
||||
|
@@ -740,7 +740,7 @@ emit_binning_pass(struct fd_batch *batch) assert_dt
|
||||
OUT_WFI5(ring);
|
||||
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_gmem,
|
||||
A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
|
||||
.gmem = true,
|
||||
.unk2 = screen->info.a6xx.ccu_cntl_gmem_unk2));
|
||||
}
|
||||
@@ -808,7 +808,7 @@ fd6_emit_tile_init(struct fd_batch *batch) assert_dt
|
||||
|
||||
fd_wfi(batch, ring);
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_gmem,
|
||||
A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
|
||||
.gmem = true,
|
||||
.unk2 = screen->info.a6xx.ccu_cntl_gmem_unk2));
|
||||
|
||||
@@ -1585,8 +1585,7 @@ fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
|
||||
fd6_cache_inv(batch, ring);
|
||||
|
||||
fd_wfi(batch, ring);
|
||||
OUT_REG(ring,
|
||||
A6XX_RB_CCU_CNTL(.offset = screen->info.a6xx.ccu_offset_bypass));
|
||||
OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));
|
||||
|
||||
/* enable stream-out, with sysmem there is only one pass: */
|
||||
OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
|
||||
|
@@ -1067,6 +1067,12 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
|
||||
|
||||
freedreno_dev_info_init(&screen->info, screen->gpu_id);
|
||||
|
||||
if (is_a6xx(screen)) {
|
||||
screen->ccu_offset_bypass = screen->info.num_ccu * A6XX_CCU_DEPTH_SIZE;
|
||||
screen->ccu_offset_gmem = (screen->gmemsize_bytes -
|
||||
screen->info.num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
|
||||
}
|
||||
|
||||
if (FD_DBG(PERFC)) {
|
||||
screen->perfcntr_groups =
|
||||
fd_perfcntrs(screen->gpu_id, &screen->num_perfcntr_groups);
|
||||
|
@@ -92,6 +92,8 @@ struct fd_screen {
|
||||
bool has_syncobj;
|
||||
|
||||
struct freedreno_dev_info info;
|
||||
uint32_t ccu_offset_gmem;
|
||||
uint32_t ccu_offset_bypass;
|
||||
|
||||
/* Bitmask of gmem_reasons that do not force GMEM path over bypass
|
||||
* for current generation.
|
||||
|
Reference in New Issue
Block a user