radv: Enable NGG culling by default on GFX10.3, add nonggc debug flag.

This commit enables NGG culling on all GFX10.3 GPUs by default.

A new debug flag environment variable RADV_DEBUG=nonggc is added to
disable this feature on GPUs where it is enabled by default.

The previous perf test flag RADV_PERFTEST=nggc will not be needed on
GFX10.3 anymore but it can still be used to enable the feature on
GPUs where it isn't on by default.

Totals from 58239 (45.27% of 128647) affected shaders:
VGPRs: 1989752 -> 2049408 (+3.00%); split: -3.21%, +6.21%
SpillSGPRs: 675 -> 883 (+30.81%); split: -78.07%, +108.89%
CodeSize: 72205968 -> 153572764 (+112.69%)
LDS: 0 -> 227125248 (+inf%)
MaxWaves: 1614598 -> 1646934 (+2.00%); split: +3.08%, -1.08%
Instrs: 14202239 -> 29654042 (+108.80%)
Latency: 87986508 -> 136960419 (+55.66%); split: -0.23%, +55.89%
InvThroughput: 14444832 -> 21141875 (+46.36%); split: -0.01%, +46.37%
VClause: 340794 -> 493067 (+44.68%); split: -1.33%, +46.01%
SClause: 520983 -> 738636 (+41.78%); split: -0.25%, +42.03%
Copies: 775639 -> 2787382 (+259.37%)
Branches: 296911 -> 1225431 (+312.73%)
PreSGPRs: 1316896 -> 2057270 (+56.22%); split: -0.14%, +56.36%
PreVGPRs: 1473558 -> 1658432 (+12.55%); split: -1.44%, +13.99%

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13086>
This commit is contained in:
Timur Kristóf
2021-09-28 16:21:42 +02:00
committed by Marge Bot
parent 5317874f91
commit 52413a93af
8 changed files with 22 additions and 12 deletions

View File

@@ -657,6 +657,8 @@ RADV driver environment variables
disable memory shaders cache
``nongg``
disable NGG for GFX10+
``nonggc``
disable NGG culling on GPUs where it's enabled by default (GFX10.3+ only).
``nooutoforder``
disable out-of-order rasterization
``notccompatcmask``
@@ -712,7 +714,7 @@ RADV driver environment variables
``pswave32``
enable wave32 for pixel shaders (GFX10+)
``nggc``
enable NGG culling on GFX10+ GPUs.
enable NGG culling on GPUs where it's not enabled by default (GFX10.1 only).
``rt``
enable rt extensions whose implementation is still experimental.
``sam``

View File

@@ -16,3 +16,4 @@ VK_KHR_shader_subgroup_extended_types on lavapipe
VK_KHR_spirv_1_4 on lavapipe
Experimental raytracing support on RADV
VK_KHR_synchronization2 on Intel
NGG shader based culling is now enabled by default on GFX10.3 on RADV.

View File

@@ -5927,7 +5927,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
radv_emit_rbplus_state(cmd_buffer);
if ((cmd_buffer->device->instance->perftest_flags & RADV_PERFTEST_NGGC) &&
if (cmd_buffer->device->physical_device->use_ngg_culling &&
cmd_buffer->state.pipeline->graphics.is_ngg)
radv_emit_ngg_culling_state(cmd_buffer, info);

View File

@@ -62,6 +62,7 @@ enum {
RADV_DEBUG_NO_TC_COMPAT_CMASK = 1ull << 31,
RADV_DEBUG_NO_VRS_FLAT_SHADING = 1ull << 32,
RADV_DEBUG_NO_ATOC_DITHERING = 1ull << 33,
RADV_DEBUG_NO_NGGC = 1ull << 34,
};
enum {

View File

@@ -705,6 +705,13 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
device->rad_info.family != CHIP_NAVI14 &&
!(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
device->use_ngg_culling =
device->use_ngg &&
device->rad_info.max_render_backends > 1 &&
(device->rad_info.chip_class >= GFX10_3 ||
(device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
!(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
device->use_ngg_streamout = false;
/* Determine the number of threads per wave for all stages. */
@@ -841,6 +848,7 @@ static const struct debug_control radv_debug_options[] = {
{"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
{"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
{"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING},
{"nonggc", RADV_DEBUG_NO_NGGC},
{NULL, 0}};
const char *

View File

@@ -217,8 +217,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats)
{
uint32_t hash_flags = 0;
if (device->instance->perftest_flags & RADV_PERFTEST_NGGC)
hash_flags |= RADV_HASH_SHADER_FORCE_NGG_CULLING;
if (device->physical_device->use_ngg_culling)
hash_flags |= RADV_HASH_SHADER_USE_NGG_CULLING;
if (device->instance->perftest_flags & RADV_PERFTEST_FORCE_EMULATE_RT)
hash_flags |= RADV_HASH_SHADER_FORCE_EMULATE_RT;
if (device->physical_device->cs_wave_size == 32)

View File

@@ -262,6 +262,9 @@ struct radv_physical_device {
/* Whether to enable NGG. */
bool use_ngg;
/* Whether to enable NGG culling. */
bool use_ngg_culling;
/* Whether to enable NGG streamout. */
bool use_ngg_streamout;
@@ -1649,7 +1652,7 @@ struct radv_event {
#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
#define RADV_HASH_SHADER_LLVM (1 << 4)
#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
#define RADV_HASH_SHADER_FORCE_NGG_CULLING (1 << 13)
#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
#define RADV_HASH_SHADER_FORCE_EMULATE_RT (1 << 16)

View File

@@ -899,10 +899,7 @@ radv_consider_culling(struct radv_device *device, struct nir_shader *nir,
if (nir->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
return false;
/* TODO: enable by default on GFX10.3 when we're confident about performance. */
bool culling_enabled = device->instance->perftest_flags & RADV_PERFTEST_NGGC;
if (!culling_enabled)
if (!device->physical_device->use_ngg_culling)
return false;
/* Shader based culling efficiency can depend on PS throughput.
@@ -912,9 +909,7 @@ radv_consider_culling(struct radv_device *device, struct nir_shader *nir,
unsigned max_render_backends = device->physical_device->rad_info.max_render_backends;
unsigned max_se = device->physical_device->rad_info.max_se;
if (max_render_backends < 2)
return false; /* Don't use NGG culling on 1 RB chips. */
else if (max_render_backends / max_se == 4)
if (max_render_backends / max_se == 4)
max_ps_params = 6; /* Sienna Cichlid and other GFX10.3 dGPUs. */
else
max_ps_params = 4; /* Navi 1x. */