radv: Use L2 coherency on GFX9+.
Especially on GFX10 we can avoid pretty much all L2 flushes. However, instead of that we have to do L2_METADATA invalidations. We do that every time we could possibly be reading new DCC/HTILE info from the L2 cache in shaders. Benchmark results, basemark on high preset with a navi10 on profile_standard (which is slower than a navi10 on default settings, please don't compare to random navi10 results you find) before: 5932 5928 5937 after: 6011 6013 6009 So this looks like a >1% increase. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7202>
This commit is contained in:

committed by
Marge Bot

parent
0af86341a2
commit
4a783a3c78
@@ -1103,12 +1103,9 @@ gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
|
||||
S_586_GLM_WB(1) | S_586_GLM_INV(1);
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2;
|
||||
}
|
||||
|
||||
/* TODO: Implement this new flag for GFX9+.
|
||||
else if (flush_bits & RADV_CMD_FLAG_INV_L2_METADATA)
|
||||
} else if (flush_bits & RADV_CMD_FLAG_INV_L2_METADATA) {
|
||||
gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1);
|
||||
*/
|
||||
}
|
||||
|
||||
if (flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
|
||||
/* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_CB_META */
|
||||
@@ -1356,11 +1353,18 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
|
||||
*sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH;
|
||||
}
|
||||
|
||||
if (chip_class == GFX9 && flush_cb_db) {
|
||||
if (chip_class == GFX9 &&
|
||||
(flush_cb_db || (flush_bits & RADV_CMD_FLAG_INV_L2_METADATA))) {
|
||||
unsigned cb_db_event, tc_flags;
|
||||
|
||||
/* Set the CB/DB flush event. */
|
||||
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
if (flush_cb_db) {
|
||||
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
} else {
|
||||
/* Besides the CB the only other thing writing HTILE
|
||||
* or DCC metadata are our meta compute shaders. */
|
||||
cb_db_event = V_028A90_CS_DONE;
|
||||
}
|
||||
|
||||
/* These are the only allowed combinations. If you need to
|
||||
* do multiple operations at once, do them separately.
|
||||
@@ -1374,11 +1378,12 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
|
||||
* TC | TC_MD = writeback & invalidate L2 metadata (DCC, etc.)
|
||||
* TCL1 = invalidate L1
|
||||
*/
|
||||
tc_flags = EVENT_TC_ACTION_ENA |
|
||||
EVENT_TC_MD_ACTION_ENA;
|
||||
tc_flags = 0;
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB |
|
||||
RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
|
||||
if (flush_cb_db) {
|
||||
*sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB |
|
||||
RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
|
||||
}
|
||||
|
||||
/* Ideally flush TC together with CB/DB. */
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_L2) {
|
||||
@@ -1393,7 +1398,11 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
|
||||
RADV_CMD_FLAG_INV_VCACHE);
|
||||
|
||||
*sqtt_flush_bits |= RGP_FLUSH_INVAL_L2;
|
||||
} else if (flush_bits & RADV_CMD_FLAG_INV_L2_METADATA) {
|
||||
tc_flags = EVENT_TC_ACTION_ENA |
|
||||
EVENT_TC_MD_ACTION_ENA;
|
||||
}
|
||||
|
||||
assert(flush_cnt);
|
||||
(*flush_cnt)++;
|
||||
|
||||
|
Reference in New Issue
Block a user