diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 7baa0b3aa36..94329a2a500 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -222,7 +222,8 @@ radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer, struct radv_streamout_state *so = &cmd_buffer->state.streamout; struct radv_shader_info *info; - if (!pipeline->streamout_shader) + if (!pipeline->streamout_shader || + cmd_buffer->device->physical_device->use_ngg_streamout) return; info = &pipeline->streamout_shader->info; @@ -5810,8 +5811,9 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) (so->enabled_mask << 8) | (so->enabled_mask << 12); - if ((old_streamout_enabled != so->streamout_enabled) || - (old_hw_enabled_mask != so->hw_enabled_mask)) + if (!cmd_buffer->device->physical_device->use_ngg_streamout && + ((old_streamout_enabled != so->streamout_enabled) || + (old_hw_enabled_mask != so->hw_enabled_mask))) radv_emit_streamout_enable(cmd_buffer); } diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index bdc38a555de..53a08bcdc5a 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -364,6 +364,8 @@ radv_physical_device_init(struct radv_physical_device *device, device->use_shader_ballot = device->rad_info.chip_class >= GFX8 && device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT; + device->use_ngg_streamout = false; + /* Determine the number of threads per wave for all stages. */ device->cs_wave_size = 64; device->ps_wave_size = 64; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 88c0c514eae..d9c91f0591b 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -771,6 +771,9 @@ declare_streamout_sgprs(struct radv_shader_context *ctx, gl_shader_stage stage, { int i; + if (ctx->options->use_ngg_streamout) + return; + /* Streamout SGPRs. */ if (ctx->shader_info->so.num_outputs) { assert(stage == MESA_SHADER_VERTEX || @@ -2786,7 +2789,8 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, sizeof(outinfo->vs_output_param_offset)); outinfo->pos_exports = 0; - if (ctx->shader_info->so.num_outputs && + if (!ctx->options->use_ngg_streamout && + ctx->shader_info->so.num_outputs && !ctx->is_gs_copy_shader) { /* The GS copy shader emission already emits streamout. */ radv_emit_streamout(ctx, 0); @@ -4479,7 +4483,8 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) LLVMValueRef stream_id; /* Fetch the vertex stream ID. */ - if (ctx->shader_info->so.num_outputs) { + if (!ctx->options->use_ngg_streamout && + ctx->shader_info->so.num_outputs) { stream_id = ac_unpack_param(&ctx->ac, ctx->streamout_config, 24, 2); } else { @@ -4550,7 +4555,8 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) } } - if (ctx->shader_info->so.num_outputs) + if (!ctx->options->use_ngg_streamout && + ctx->shader_info->so.num_outputs) radv_emit_streamout(ctx, stream); if (stream == 0) { diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 054f6ac36f8..48ea2c03929 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2350,20 +2350,21 @@ radv_fill_shader_keys(struct radv_device *device, keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false; } - /* TODO: Implement streamout support for NGG. */ - gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX; + if (!device->physical_device->use_ngg_streamout) { + gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX; - for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { - if (nir[i]) - last_xfb_stage = i; - } + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + if (nir[i]) + last_xfb_stage = i; + } - if (nir[last_xfb_stage] && - radv_nir_stage_uses_xfb(nir[last_xfb_stage])) { - if (nir[MESA_SHADER_TESS_CTRL]) - keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false; - else - keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false; + if (nir[last_xfb_stage] && + radv_nir_stage_uses_xfb(nir[last_xfb_stage])) { + if (nir[MESA_SHADER_TESS_CTRL]) + keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false; + else + keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false; + } } } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index d6c446abd06..8b612155621 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -288,6 +288,9 @@ struct radv_physical_device { /* Whether to enable the AMD_shader_ballot extension */ bool use_shader_ballot; + /* Whether to enable NGG streamout. */ + bool use_ngg_streamout; + /* Number of threads per wave. */ uint8_t ps_wave_size; uint8_t cs_wave_size; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 473b6b0032f..c8dd54fae53 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -683,12 +683,15 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice, config_out->float_mode |= V_00B028_FP_64_DENORMS; config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) | - S_00B12C_SCRATCH_EN(scratch_enabled) | - S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | - S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) | - S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | - S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) | - S_00B12C_SO_EN(!!info->so.num_outputs); + S_00B12C_SCRATCH_EN(scratch_enabled); + + if (!pdevice->use_ngg_streamout) { + config_out->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | + S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) | + S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | + S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) | + S_00B12C_SO_EN(!!info->so.num_outputs); + } config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) | @@ -1050,6 +1053,7 @@ shader_variant_compile(struct radv_device *device, options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size; options->address32_hi = device->physical_device->rad_info.address32_hi; options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug; + options->use_ngg_streamout = device->physical_device->use_ngg_streamout; if ((stage == MESA_SHADER_GEOMETRY && !options->key.vs_common_out.as_ngg) || gs_copy_shader) diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 61431cc9683..874318e7dc4 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -126,6 +126,7 @@ struct radv_nir_compiler_options { bool record_llvm_ir; bool check_ir; bool has_ls_vgpr_init_bug; + bool use_ngg_streamout; enum radeon_family family; enum chip_class chip_class; uint32_t tess_offchip_block_dw_size;