freedreno/a6xx: Document GRAS_SC_CNTL::SINGLE_PRIM_MODE

Add a value discovered when investigating how the blob implements
GL_KHR_blend_equation_advanced.

Note that everything added here is a bit speculative, because it's
assuming the blob's implementation of GL_KHR_blend_equation_advanced is
sane. In particular a value of 0x3 seems to solve the UBWC problem as
well, so I'm not sure whether my description of the difference between
0x1 and 0x3 is correct. I'm also surprised that it uses the same value
for the coherent and non-coherent cases when forcing sysmem.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12360>
This commit is contained in:
Connor Abbott
2021-08-13 15:37:23 +02:00
committed by Marge Bot
parent d18f102275
commit 351c6b8bfe
4 changed files with 29 additions and 6 deletions

View File

@@ -5395,7 +5395,7 @@ clusters:
00000000 GRAS_VS_LAYER_CNTL: { 0 }
00000000 GRAS_GS_LAYER_CNTL: { 0 }
00000000 GRAS_DS_LAYER_CNTL: { 0 }
00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = 0 | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = NO_FLUSH | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
00000101 GRAS_BIN_CONTROL: { BINW = 32 | BINH = 16 | RENDER_MODE = RENDERING_PASS | BUFFERS_LOCATION = BUFFERS_IN_GMEM | LRZ_FEEDBACK_ZMODE_MASK = 0 }
00000000 GRAS_RAS_MSAA_CNTL: { SAMPLES = MSAA_ONE }
00000000 GRAS_DEST_MSAA_CNTL: { SAMPLES = MSAA_ONE }
@@ -5640,7 +5640,7 @@ clusters:
00000000 GRAS_VS_LAYER_CNTL: { 0 }
00000000 GRAS_GS_LAYER_CNTL: { 0 }
00000000 GRAS_DS_LAYER_CNTL: { 0 }
00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = 0 | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = NO_FLUSH | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
00000101 GRAS_BIN_CONTROL: { BINW = 32 | BINH = 16 | RENDER_MODE = RENDERING_PASS | BUFFERS_LOCATION = BUFFERS_IN_GMEM | LRZ_FEEDBACK_ZMODE_MASK = 0 }
00000000 GRAS_RAS_MSAA_CNTL: { SAMPLES = MSAA_ONE }
00000000 GRAS_DEST_MSAA_CNTL: { SAMPLES = MSAA_ONE }

View File

@@ -157,7 +157,7 @@ t4 write GRAS_SU_CONSERVATIVE_RAS_CNTL (8099)
GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
000000000105818c: 0000: 40809901 00000000
t4 write GRAS_SC_CNTL (80a0)
GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = 0 | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = NO_FLUSH | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
0000000001058194: 0000: 4080a001 00000002
t4 write GRAS_UNKNOWN_80AF (80af)
GRAS_UNKNOWN_80AF: FALSE
@@ -286,7 +286,7 @@ t7 opcode: CP_BLIT (2c) (2 dwords)
!+ 03200000 UCHE_UNKNOWN_0E12: 0x3200000
!+ 00000004 UCHE_CLIENT_PF: { PERFSEL = 0x4 }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
!+ 00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = 0 | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
!+ 00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = NO_FLUSH | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
+ 00000000 GRAS_UNKNOWN_80AF: FALSE
+ 00000000 GRAS_LRZ_CNTL: { 0 }
+ 00000000 GRAS_UNKNOWN_8110: 0

View File

@@ -148,7 +148,7 @@ t4 write GRAS_VS_LAYER_CNTL (809b)
GRAS_VS_LAYER_CNTL: { 0 }
0000000001d91174: 0000: 48809b01 00000000
t4 write GRAS_SC_CNTL (80a0)
GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = 0 | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = NO_FLUSH | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
0000000001d9117c: 0000: 4080a001 00000002
t4 write GRAS_UNKNOWN_80AF (80af)
GRAS_UNKNOWN_80AF: FALSE
@@ -941,7 +941,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
+ 00000000 GRAS_SU_DEPTH_BUFFER_INFO: { DEPTH_FORMAT = DEPTH6_NONE }
+ 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: { SHIFTAMOUNT = 0 }
+ 00000000 GRAS_VS_LAYER_CNTL: { 0 }
!+ 00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = 0 | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
!+ 00000002 GRAS_SC_CNTL: { CCUSINGLECACHELINESIZE = 0x2 | SINGLE_PRIM_MODE = NO_FLUSH | RASTER_MODE = TYPE_TILED | RASTER_DIRECTION = LR_TB | SEQUENCED_THREAD_DISTRIBUTION = DIST_SCREEN_COORD }
!+ 06041e11 GRAS_BIN_CONTROL: { BINW = 544 | BINH = 480 | RENDER_MODE = BINNING_PASS | BUFFERS_LOCATION = BUFFERS_IN_GMEM | LRZ_FEEDBACK_ZMODE_MASK = 0x6 }
+ 00000000 GRAS_RAS_MSAA_CNTL: { SAMPLES = MSAA_ONE }
!+ 00000004 GRAS_DEST_MSAA_CNTL: { SAMPLES = MSAA_ONE | MSAA_DISABLE }

View File

@@ -1645,6 +1645,29 @@ to upconvert to 32b float internally?
</enum>
<enum name="a6xx_single_prim_mode">
<value value="0x0" name="NO_FLUSH"/>
<doc>
In addition to FLUSH_PER_OVERLAP, guarantee that UCHE
and CCU don't get out of sync when fetching the previous
value for the current pixel. With NO_FLUSH, there's the
possibility that the flags for the current pixel are
flushed before the data or vice-versa, leading to
texture fetches via UCHE getting out of sync values.
This mode should eliminate that. It's used in bypass
mode for coherent blending
(GL_KHR_blend_equation_advanced_coherent) as well as
non-coherent blending.
</doc>
<value value="0x1" name="FLUSH_PER_OVERLAP_AND_OVERWRITE"/>
<doc>
Invalidate UCHE and wait for any pending work to finish
if there was possibly an overlapping primitive prior to
the current one. This is similar to a combination of
GRAS_SC_CONTROL::INJECT_L2_INVALIDATE_EVENT and
WAIT_RB_IDLE_ALL_TRI on a3xx. It's used in GMEM mode for
coherent blending
(GL_KHR_blend_equation_advanced_coherent).
</doc>
<value value="0x3" name="FLUSH_PER_OVERLAP"/>
</enum>