diff --git a/docs/envvars.rst b/docs/envvars.rst index 4be7a1fe1f2..1584cc7f3c2 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -1332,8 +1332,6 @@ RADV driver environment variables disable optimizations that get enabled when all VRAM is CPU visible. ``pswave32`` enable wave32 for pixel shaders (GFX10+) - ``ngg_streamout`` - enable NGG streamout ``nggc`` enable NGG culling on GPUs where it's not enabled by default (GFX10.1 only). ``sam`` diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index d8a0ab5caf3..dfb011f10d7 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -6353,11 +6353,7 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; if (cmd_buffer->device->physical_device->use_ngg_streamout) { - /* GFX11 only needs GDS OA for streamout. */ - if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX11) { - cmd_buffer->gds_needed = true; - } - + /* GFX11 needs GDS OA for streamout. */ cmd_buffer->gds_oa_needed = true; } } @@ -10961,7 +10957,6 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; struct radv_streamout_state *so = &cmd_buffer->state.streamout; struct radv_shader_info *info = &cmd_buffer->state.last_vgt_shader->info; - unsigned last_target = util_last_bit(so->enabled_mask) - 1; struct radeon_cmdbuf *cs = cmd_buffer->cs; assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); @@ -11001,29 +10996,18 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC } if (cmd_buffer->device->physical_device->use_ngg_streamout) { - if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { - if (append) { - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit( - cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i); - radeon_emit(cs, 0); - } else { - /* The PKT3 CAM bit workaround seems needed for initializing this GDS register to zero. */ - radeon_set_perfctr_reg(cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf, cs, - R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 4, 0); - } - } else { - radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); - radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) | S_411_DST_SEL(V_411_GDS) | - S_411_CP_SYNC(i == last_target)); + if (append) { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, + COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | COPY_DATA_WR_CONFIRM); radeon_emit(cs, va); radeon_emit(cs, va >> 32); - radeon_emit(cs, 4 * i); /* destination in GDS */ + radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i); radeon_emit(cs, 0); - radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target)); + } else { + /* The PKT3 CAM bit workaround seems needed for initializing this GDS register to zero. */ + radeon_set_perfctr_reg(cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf, cs, + R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 4, 0); } } else { /* AMD GCN binds streamout buffers as shader resources. @@ -11104,22 +11088,14 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou } if (cmd_buffer->device->physical_device->use_ngg_streamout) { - if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { - if (append) { - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit( - cs, COPY_DATA_SRC_SEL(COPY_DATA_REG) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); - radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i); - radeon_emit(cs, 0); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - } - } else { - if (append) { - si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, cmd_buffer->qf, - V_028A90_PS_DONE, 0, EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va, - EOP_DATA_GDS(i, 1), 0); - } + if (append) { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, + COPY_DATA_SRC_SEL(COPY_DATA_REG) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | COPY_DATA_WR_CONFIRM); + radeon_emit(cs, (R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 >> 2) + i); + radeon_emit(cs, 0); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); } } else { if (append) { diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index fb18bf0c8cb..12b5d1a9bbc 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -85,9 +85,8 @@ enum { RADV_PERFTEST_NGGC = 1u << 8, RADV_PERFTEST_EMULATE_RT = 1u << 9, RADV_PERFTEST_RT_WAVE_64 = 1u << 10, - RADV_PERFTEST_NGG_STREAMOUT = 1u << 11, - RADV_PERFTEST_VIDEO_DECODE = 1u << 12, - RADV_PERFTEST_DMA_SHADERS = 1u << 13, + RADV_PERFTEST_VIDEO_DECODE = 1u << 11, + RADV_PERFTEST_DMA_SHADERS = 1u << 12, }; bool radv_init_trace(struct radv_device *device); diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c index 03d647ad5a5..7e9a60e2929 100644 --- a/src/amd/vulkan/radv_instance.c +++ b/src/amd/vulkan/radv_instance.c @@ -96,7 +96,6 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P {"nggc", RADV_PERFTEST_NGGC}, {"emulate_rt", RADV_PERFTEST_EMULATE_RT}, {"rtwave64", RADV_PERFTEST_RT_WAVE_64}, - {"ngg_streamout", RADV_PERFTEST_NGG_STREAMOUT}, {"video_decode", RADV_PERFTEST_VIDEO_DECODE}, {"dmashaders", RADV_PERFTEST_DMA_SHADERS}, {NULL, 0}}; diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index 541e13da935..38dcf6ce1e4 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -1882,8 +1882,7 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm (device->rad_info.gfx_level == GFX10_3 || (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) && !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC); - device->use_ngg_streamout = device->use_ngg && (device->rad_info.gfx_level >= GFX11 || - (device->instance->perftest_flags & RADV_PERFTEST_NGG_STREAMOUT)); + device->use_ngg_streamout = device->rad_info.gfx_level >= GFX11; device->emulate_ngg_gs_query_pipeline_stat = device->use_ngg && device->rad_info.gfx_level < GFX11; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index d18f5b04875..7ec93b7a73a 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -242,8 +242,6 @@ radv_get_hash_flags(const struct radv_device *device, bool stats) hash_flags |= RADV_HASH_SHADER_SPLIT_FMA; if (device->instance->debug_flags & RADV_DEBUG_NO_FMASK) hash_flags |= RADV_HASH_SHADER_NO_FMASK; - if (device->physical_device->use_ngg_streamout) - hash_flags |= RADV_HASH_SHADER_NGG_STREAMOUT; if (device->instance->debug_flags & RADV_DEBUG_NO_RT) hash_flags |= RADV_HASH_SHADER_NO_RT; if (device->instance->dual_color_blend_by_location) diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 5eb9102f8c0..c87cd06bebb 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2174,9 +2174,8 @@ struct radv_event { #define RADV_HASH_SHADER_SPLIT_FMA (1 << 17) #define RADV_HASH_SHADER_RT_WAVE64 (1 << 18) #define RADV_HASH_SHADER_NO_FMASK (1 << 19) -#define RADV_HASH_SHADER_NGG_STREAMOUT (1 << 20) -#define RADV_HASH_SHADER_NO_RT (1 << 21) -#define RADV_HASH_SHADER_DUAL_BLEND_MRT1 (1 << 22) +#define RADV_HASH_SHADER_NO_RT (1 << 20) +#define RADV_HASH_SHADER_DUAL_BLEND_MRT1 (1 << 21) struct radv_pipeline_key; struct radv_ray_tracing_group;