radv: Enable NGG culling by default on GFX10.3, add nonggc debug flag.
This commit enables NGG culling on all GFX10.3 GPUs by default.
A new debug flag environment variable RADV_DEBUG=nonggc is added to
disable this feature on GPUs where it is enabled by default.
The previous perf test flag RADV_PERFTEST=nggc will not be needed on
GFX10.3 anymore but it can still be used to enable the feature on
GPUs where it isn't on by default.
Totals from 58239 (45.27% of 128647) affected shaders:
VGPRs: 1989752 -> 2049408
(+3.00%); split: -3.21%, +6.21%
SpillSGPRs: 675 -> 883 (+30.81%); split: -78.07%, +108.89%
CodeSize: 72205968 -> 153572764 (+112.69%)
LDS: 0 -> 227125248 (+inf%)
MaxWaves: 1614598 -> 1646934 (+2.00%); split: +3.08%, -1.08%
Instrs: 14202239 -> 29654042 (+108.80%)
Latency: 87986508 -> 136960419 (+55.66%); split: -0.23%, +55.89%
InvThroughput: 14444832 -> 21141875 (+46.36%); split: -0.01%, +46.37%
VClause: 340794 -> 493067 (+44.68%); split: -1.33%, +46.01%
SClause: 520983 -> 738636 (+41.78%); split: -0.25%, +42.03%
Copies: 775639 -> 2787382 (+259.37%)
Branches: 296911 -> 1225431 (+312.73%)
PreSGPRs: 1316896 -> 2057270 (+56.22%); split: -0.14%, +56.36%
PreVGPRs: 1473558 -> 1658432 (+12.55%); split: -1.44%, +13.99%
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13086>
This commit is contained in:
@@ -657,6 +657,8 @@ RADV driver environment variables
|
||||
disable memory shaders cache
|
||||
``nongg``
|
||||
disable NGG for GFX10+
|
||||
``nonggc``
|
||||
disable NGG culling on GPUs where it's enabled by default (GFX10.3+ only).
|
||||
``nooutoforder``
|
||||
disable out-of-order rasterization
|
||||
``notccompatcmask``
|
||||
@@ -712,7 +714,7 @@ RADV driver environment variables
|
||||
``pswave32``
|
||||
enable wave32 for pixel shaders (GFX10+)
|
||||
``nggc``
|
||||
enable NGG culling on GFX10+ GPUs.
|
||||
enable NGG culling on GPUs where it's not enabled by default (GFX10.1 only).
|
||||
``rt``
|
||||
enable rt extensions whose implementation is still experimental.
|
||||
``sam``
|
||||
|
@@ -16,3 +16,4 @@ VK_KHR_shader_subgroup_extended_types on lavapipe
|
||||
VK_KHR_spirv_1_4 on lavapipe
|
||||
Experimental raytracing support on RADV
|
||||
VK_KHR_synchronization2 on Intel
|
||||
NGG shader based culling is now enabled by default on GFX10.3 on RADV.
|
@@ -5927,7 +5927,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
|
||||
cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
|
||||
radv_emit_rbplus_state(cmd_buffer);
|
||||
|
||||
if ((cmd_buffer->device->instance->perftest_flags & RADV_PERFTEST_NGGC) &&
|
||||
if (cmd_buffer->device->physical_device->use_ngg_culling &&
|
||||
cmd_buffer->state.pipeline->graphics.is_ngg)
|
||||
radv_emit_ngg_culling_state(cmd_buffer, info);
|
||||
|
||||
|
@@ -62,6 +62,7 @@ enum {
|
||||
RADV_DEBUG_NO_TC_COMPAT_CMASK = 1ull << 31,
|
||||
RADV_DEBUG_NO_VRS_FLAT_SHADING = 1ull << 32,
|
||||
RADV_DEBUG_NO_ATOC_DITHERING = 1ull << 33,
|
||||
RADV_DEBUG_NO_NGGC = 1ull << 34,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@@ -705,6 +705,13 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
|
||||
device->rad_info.family != CHIP_NAVI14 &&
|
||||
!(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
|
||||
|
||||
device->use_ngg_culling =
|
||||
device->use_ngg &&
|
||||
device->rad_info.max_render_backends > 1 &&
|
||||
(device->rad_info.chip_class >= GFX10_3 ||
|
||||
(device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
|
||||
!(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
|
||||
|
||||
device->use_ngg_streamout = false;
|
||||
|
||||
/* Determine the number of threads per wave for all stages. */
|
||||
@@ -841,6 +848,7 @@ static const struct debug_control radv_debug_options[] = {
|
||||
{"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
|
||||
{"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
|
||||
{"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING},
|
||||
{"nonggc", RADV_DEBUG_NO_NGGC},
|
||||
{NULL, 0}};
|
||||
|
||||
const char *
|
||||
|
@@ -217,8 +217,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats)
|
||||
{
|
||||
uint32_t hash_flags = 0;
|
||||
|
||||
if (device->instance->perftest_flags & RADV_PERFTEST_NGGC)
|
||||
hash_flags |= RADV_HASH_SHADER_FORCE_NGG_CULLING;
|
||||
if (device->physical_device->use_ngg_culling)
|
||||
hash_flags |= RADV_HASH_SHADER_USE_NGG_CULLING;
|
||||
if (device->instance->perftest_flags & RADV_PERFTEST_FORCE_EMULATE_RT)
|
||||
hash_flags |= RADV_HASH_SHADER_FORCE_EMULATE_RT;
|
||||
if (device->physical_device->cs_wave_size == 32)
|
||||
|
@@ -262,6 +262,9 @@ struct radv_physical_device {
|
||||
/* Whether to enable NGG. */
|
||||
bool use_ngg;
|
||||
|
||||
/* Whether to enable NGG culling. */
|
||||
bool use_ngg_culling;
|
||||
|
||||
/* Whether to enable NGG streamout. */
|
||||
bool use_ngg_streamout;
|
||||
|
||||
@@ -1649,7 +1652,7 @@ struct radv_event {
|
||||
#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
|
||||
#define RADV_HASH_SHADER_LLVM (1 << 4)
|
||||
#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
|
||||
#define RADV_HASH_SHADER_FORCE_NGG_CULLING (1 << 13)
|
||||
#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
|
||||
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14)
|
||||
#define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15)
|
||||
#define RADV_HASH_SHADER_FORCE_EMULATE_RT (1 << 16)
|
||||
|
@@ -899,10 +899,7 @@ radv_consider_culling(struct radv_device *device, struct nir_shader *nir,
|
||||
if (nir->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK))
|
||||
return false;
|
||||
|
||||
/* TODO: enable by default on GFX10.3 when we're confident about performance. */
|
||||
bool culling_enabled = device->instance->perftest_flags & RADV_PERFTEST_NGGC;
|
||||
|
||||
if (!culling_enabled)
|
||||
if (!device->physical_device->use_ngg_culling)
|
||||
return false;
|
||||
|
||||
/* Shader based culling efficiency can depend on PS throughput.
|
||||
@@ -912,9 +909,7 @@ radv_consider_culling(struct radv_device *device, struct nir_shader *nir,
|
||||
unsigned max_render_backends = device->physical_device->rad_info.max_render_backends;
|
||||
unsigned max_se = device->physical_device->rad_info.max_se;
|
||||
|
||||
if (max_render_backends < 2)
|
||||
return false; /* Don't use NGG culling on 1 RB chips. */
|
||||
else if (max_render_backends / max_se == 4)
|
||||
if (max_render_backends / max_se == 4)
|
||||
max_ps_params = 6; /* Sienna Cichlid and other GFX10.3 dGPUs. */
|
||||
else
|
||||
max_ps_params = 4; /* Navi 1x. */
|
||||
|
Reference in New Issue
Block a user