radv: remove NGG streamout support for RDNA1-2
This was useful for experimenting it on RDNA2 and during RNDA3 bringup, but now the support is rock solid on RDNA3 and it's useless to keep the RADV_PERFTEST=ngg_streamout option. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25903>
This commit is contained in:

committed by
Marge Bot

parent
7beddd4f5c
commit
eb47e07782
@@ -1332,8 +1332,6 @@ RADV driver environment variables
|
||||
disable optimizations that get enabled when all VRAM is CPU visible.
|
||||
``pswave32``
|
||||
enable wave32 for pixel shaders (GFX10+)
|
||||
``ngg_streamout``
|
||||
enable NGG streamout
|
||||
``nggc``
|
||||
enable NGG culling on GPUs where it's not enabled by default (GFX10.1 only).
|
||||
``sam``
|
||||
|
@@ -6353,11 +6353,7 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
|
||||
|
||||
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
|
||||
/* GFX11 only needs GDS OA for streamout. */
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX11) {
|
||||
cmd_buffer->gds_needed = true;
|
||||
}
|
||||
|
||||
/* GFX11 needs GDS OA for streamout. */
|
||||
cmd_buffer->gds_oa_needed = true;
|
||||
}
|
||||
}
|
||||
@@ -10961,7 +10957,6 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
|
||||
struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
|
||||
struct radv_streamout_state *so = &cmd_buffer->state.streamout;
|
||||
struct radv_shader_info *info = &cmd_buffer->state.last_vgt_shader->info;
|
||||
unsigned last_target = util_last_bit(so->enabled_mask) - 1;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
|
||||
assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
|
||||
@@ -11001,29 +10996,18 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC
|
||||
}
|
||||
|
||||
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
if (append) {
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
radeon_emit(
|
||||
cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM);
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
|
||||
radeon_emit(cs, 0);
|
||||
} else {
|
||||
/* The PKT3 CAM bit workaround seems needed for initializing this GDS register to zero. */
|
||||
radeon_set_perfctr_reg(cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf, cs,
|
||||
R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 4, 0);
|
||||
}
|
||||
} else {
|
||||
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
|
||||
radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) | S_411_DST_SEL(V_411_GDS) |
|
||||
S_411_CP_SYNC(i == last_target));
|
||||
if (append) {
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
radeon_emit(cs,
|
||||
COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM);
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
radeon_emit(cs, 4 * i); /* destination in GDS */
|
||||
radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
|
||||
} else {
|
||||
/* The PKT3 CAM bit workaround seems needed for initializing this GDS register to zero. */
|
||||
radeon_set_perfctr_reg(cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf, cs,
|
||||
R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 4, 0);
|
||||
}
|
||||
} else {
|
||||
/* AMD GCN binds streamout buffers as shader resources.
|
||||
@@ -11104,22 +11088,14 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou
|
||||
}
|
||||
|
||||
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
|
||||
if (append) {
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
radeon_emit(
|
||||
cs, COPY_DATA_SRC_SEL(COPY_DATA_REG) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
|
||||
radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
}
|
||||
} else {
|
||||
if (append) {
|
||||
si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_PS_DONE, 0, EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va,
|
||||
EOP_DATA_GDS(i, 1), 0);
|
||||
}
|
||||
if (append) {
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
|
||||
radeon_emit(cs,
|
||||
COPY_DATA_SRC_SEL(COPY_DATA_REG) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM);
|
||||
radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i);
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
}
|
||||
} else {
|
||||
if (append) {
|
||||
|
@@ -85,9 +85,8 @@ enum {
|
||||
RADV_PERFTEST_NGGC = 1u << 8,
|
||||
RADV_PERFTEST_EMULATE_RT = 1u << 9,
|
||||
RADV_PERFTEST_RT_WAVE_64 = 1u << 10,
|
||||
RADV_PERFTEST_NGG_STREAMOUT = 1u << 11,
|
||||
RADV_PERFTEST_VIDEO_DECODE = 1u << 12,
|
||||
RADV_PERFTEST_DMA_SHADERS = 1u << 13,
|
||||
RADV_PERFTEST_VIDEO_DECODE = 1u << 11,
|
||||
RADV_PERFTEST_DMA_SHADERS = 1u << 12,
|
||||
};
|
||||
|
||||
bool radv_init_trace(struct radv_device *device);
|
||||
|
@@ -96,7 +96,6 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P
|
||||
{"nggc", RADV_PERFTEST_NGGC},
|
||||
{"emulate_rt", RADV_PERFTEST_EMULATE_RT},
|
||||
{"rtwave64", RADV_PERFTEST_RT_WAVE_64},
|
||||
{"ngg_streamout", RADV_PERFTEST_NGG_STREAMOUT},
|
||||
{"video_decode", RADV_PERFTEST_VIDEO_DECODE},
|
||||
{"dmashaders", RADV_PERFTEST_DMA_SHADERS},
|
||||
{NULL, 0}};
|
||||
|
@@ -1882,8 +1882,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
|
||||
(device->rad_info.gfx_level == GFX10_3 || (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
|
||||
!(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
|
||||
|
||||
device->use_ngg_streamout = device->use_ngg && (device->rad_info.gfx_level >= GFX11 ||
|
||||
(device->instance->perftest_flags & RADV_PERFTEST_NGG_STREAMOUT));
|
||||
device->use_ngg_streamout = device->rad_info.gfx_level >= GFX11;
|
||||
|
||||
device->emulate_ngg_gs_query_pipeline_stat = device->use_ngg && device->rad_info.gfx_level < GFX11;
|
||||
|
||||
|
@@ -242,8 +242,6 @@ radv_get_hash_flags(const struct radv_device *device, bool stats)
|
||||
hash_flags |= RADV_HASH_SHADER_SPLIT_FMA;
|
||||
if (device->instance->debug_flags & RADV_DEBUG_NO_FMASK)
|
||||
hash_flags |= RADV_HASH_SHADER_NO_FMASK;
|
||||
if (device->physical_device->use_ngg_streamout)
|
||||
hash_flags |= RADV_HASH_SHADER_NGG_STREAMOUT;
|
||||
if (device->instance->debug_flags & RADV_DEBUG_NO_RT)
|
||||
hash_flags |= RADV_HASH_SHADER_NO_RT;
|
||||
if (device->instance->dual_color_blend_by_location)
|
||||
|
@@ -2174,9 +2174,8 @@ struct radv_event {
|
||||
#define RADV_HASH_SHADER_SPLIT_FMA (1 << 17)
|
||||
#define RADV_HASH_SHADER_RT_WAVE64 (1 << 18)
|
||||
#define RADV_HASH_SHADER_NO_FMASK (1 << 19)
|
||||
#define RADV_HASH_SHADER_NGG_STREAMOUT (1 << 20)
|
||||
#define RADV_HASH_SHADER_NO_RT (1 << 21)
|
||||
#define RADV_HASH_SHADER_DUAL_BLEND_MRT1 (1 << 22)
|
||||
#define RADV_HASH_SHADER_NO_RT (1 << 20)
|
||||
#define RADV_HASH_SHADER_DUAL_BLEND_MRT1 (1 << 21)
|
||||
|
||||
struct radv_pipeline_key;
|
||||
struct radv_ray_tracing_group;
|
||||
|
Reference in New Issue
Block a user