radv: remove NGG streamout support for RDNA1-2

This was useful for experimenting it on RDNA2 and during RNDA3 bringup,
but now the support is rock solid on RDNA3 and it's useless to keep the
RADV_PERFTEST=ngg_streamout option.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25903>
This commit is contained in:
Samuel Pitoiset
2023-10-26 09:56:37 +02:00
committed by Marge Bot
parent 7beddd4f5c
commit eb47e07782
7 changed files with 23 additions and 55 deletions

View File

@@ -1332,8 +1332,6 @@ RADV driver environment variables
disable optimizations that get enabled when all VRAM is CPU visible.
``pswave32``
enable wave32 for pixel shaders (GFX10+)
``ngg_streamout``
enable NGG streamout
``nggc``
enable NGG culling on GPUs where it's not enabled by default (GFX10.1 only).
``sam``

View File

@@ -6353,11 +6353,7 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
/* GFX11 only needs GDS OA for streamout. */
if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX11) {
cmd_buffer->gds_needed = true;
}
/* GFX11 needs GDS OA for streamout. */
cmd_buffer->gds_oa_needed = true;
}
}
@@ -10961,7 +10957,6 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
struct radv_shader_info *info = &cmd_buffer->state.last_vgt_shader->info;
unsigned last_target = util_last_bit(so->enabled_mask) - 1;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
@@ -11001,29 +10996,18 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
}
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
if (append) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(
cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
radeon_emit(cs, 0);
} else {
/* The PKT3 CAM bit workaround seems needed for initializing this GDS register to zero. */
radeon_set_perfctr_reg(cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf, cs,
R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 4, 0);
}
} else {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) | S_411_DST_SEL(V_411_GDS) |
S_411_CP_SYNC(i == last_target));
if (append) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs,
COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, 4 * i); /* destination in GDS */
radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
radeon_emit(cs, 0);
radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
} else {
/* The PKT3 CAM bit workaround seems needed for initializing this GDS register to zero. */
radeon_set_perfctr_reg(cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf, cs,
R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 4, 0);
}
} else {
/* AMD GCN binds streamout buffers as shader resources.
@@ -11104,22 +11088,14 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou
}
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
if (append) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(
cs, COPY_DATA_SRC_SEL(COPY_DATA_REG) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
radeon_emit(cs, 0);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
}
} else {
if (append) {
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
V_028A90_PS_DONE, 0, EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va,
EOP_DATA_GDS(i, 1), 0);
}
if (append) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs,
COPY_DATA_SRC_SEL(COPY_DATA_REG) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
radeon_emit(cs, 0);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
}
} else {
if (append) {

View File

@@ -85,9 +85,8 @@ enum {
RADV_PERFTEST_NGGC = 1u << 8,
RADV_PERFTEST_EMULATE_RT = 1u << 9,
RADV_PERFTEST_RT_WAVE_64 = 1u << 10,
RADV_PERFTEST_NGG_STREAMOUT = 1u << 11,
RADV_PERFTEST_VIDEO_DECODE = 1u << 12,
RADV_PERFTEST_DMA_SHADERS = 1u << 13,
RADV_PERFTEST_VIDEO_DECODE = 1u << 11,
RADV_PERFTEST_DMA_SHADERS = 1u << 12,
};
bool radv_init_trace(struct radv_device *device);

View File

@@ -96,7 +96,6 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P
{"nggc", RADV_PERFTEST_NGGC},
{"emulate_rt", RADV_PERFTEST_EMULATE_RT},
{"rtwave64", RADV_PERFTEST_RT_WAVE_64},
{"ngg_streamout", RADV_PERFTEST_NGG_STREAMOUT},
{"video_decode", RADV_PERFTEST_VIDEO_DECODE},
{"dmashaders", RADV_PERFTEST_DMA_SHADERS},
{NULL, 0}};

View File

@@ -1882,8 +1882,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
(device->rad_info.gfx_level == GFX10_3 || (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
!(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
device->use_ngg_streamout = device->use_ngg && (device->rad_info.gfx_level >= GFX11 ||
(device->instance->perftest_flags & RADV_PERFTEST_NGG_STREAMOUT));
device->use_ngg_streamout = device->rad_info.gfx_level >= GFX11;
device->emulate_ngg_gs_query_pipeline_stat = device->use_ngg && device->rad_info.gfx_level < GFX11;

View File

@@ -242,8 +242,6 @@ radv_get_hash_flags(const struct radv_device *device, bool stats)
hash_flags |= RADV_HASH_SHADER_SPLIT_FMA;
if (device->instance->debug_flags & RADV_DEBUG_NO_FMASK)
hash_flags |= RADV_HASH_SHADER_NO_FMASK;
if (device->physical_device->use_ngg_streamout)
hash_flags |= RADV_HASH_SHADER_NGG_STREAMOUT;
if (device->instance->debug_flags & RADV_DEBUG_NO_RT)
hash_flags |= RADV_HASH_SHADER_NO_RT;
if (device->instance->dual_color_blend_by_location)

View File

@@ -2174,9 +2174,8 @@ struct radv_event {
#define RADV_HASH_SHADER_SPLIT_FMA (1 << 17)
#define RADV_HASH_SHADER_RT_WAVE64 (1 << 18)
#define RADV_HASH_SHADER_NO_FMASK (1 << 19)
#define RADV_HASH_SHADER_NGG_STREAMOUT (1 << 20)
#define RADV_HASH_SHADER_NO_RT (1 << 21)
#define RADV_HASH_SHADER_DUAL_BLEND_MRT1 (1 << 22)
#define RADV_HASH_SHADER_NO_RT (1 << 20)
#define RADV_HASH_SHADER_DUAL_BLEND_MRT1 (1 << 21)
struct radv_pipeline_key;
struct radv_ray_tracing_group;