diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 513c101bcdf..f6b4f846430 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1726,6 +1726,15 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir, if (sel->stage == MESA_SHADER_FRAGMENT && key->ps.mono.point_smoothing) NIR_PASS(progress, nir, nir_lower_point_smooth); + bool is_last_vgt_stage = + (sel->stage == MESA_SHADER_VERTEX || + sel->stage == MESA_SHADER_TESS_EVAL || + (sel->stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg)) && + !shader->key.ge.as_ls && !shader->key.ge.as_es; + + if (is_last_vgt_stage) + NIR_PASS(progress, nir, si_nir_clamp_vertex_color); + if (progress) si_nir_opts(sel->screen, nir, true); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 0daea376085..363267119bc 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -375,6 +375,71 @@ void si_preload_gs_rings(struct si_shader_context *ctx) } } +/** + * Vertex color clamping. + * + * This uses a state constant loaded in a user data SGPR and + * an IF statement is added that clamps all colors if the constant + * is true. + */ +static void si_vertex_color_clamping(struct si_shader_context *ctx, + struct si_shader_output_values *outputs, unsigned noutput) +{ + LLVMValueRef addr[SI_MAX_VS_OUTPUTS][4]; + bool has_colors = false; + + /* Store original colors to alloca variables. */ + for (unsigned i = 0; i < noutput; i++) { + if (outputs[i].semantic != VARYING_SLOT_COL0 && + outputs[i].semantic != VARYING_SLOT_COL1 && + outputs[i].semantic != VARYING_SLOT_BFC0 && + outputs[i].semantic != VARYING_SLOT_BFC1) + continue; + + for (unsigned j = 0; j < 4; j++) + addr[i][j] = ac_build_alloca_init(&ctx->ac, outputs[i].values[j], ""); + + has_colors = true; + } + + if (!has_colors) + return; + + /* The state is in the first bit of the user SGPR. */ + LLVMValueRef cond = GET_FIELD(ctx, VS_STATE_CLAMP_VERTEX_COLOR); + cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, ""); + + ac_build_ifcc(&ctx->ac, cond, 6502); + + /* Store clamped colors to alloca variables within the conditional block. */ + for (unsigned i = 0; i < noutput; i++) { + if (outputs[i].semantic != VARYING_SLOT_COL0 && + outputs[i].semantic != VARYING_SLOT_COL1 && + outputs[i].semantic != VARYING_SLOT_BFC0 && + outputs[i].semantic != VARYING_SLOT_BFC1) + continue; + + for (unsigned j = 0; j < 4; j++) { + LLVMBuildStore(ctx->ac.builder, ac_build_clamp(&ctx->ac, outputs[i].values[j]), + addr[i][j]); + } + } + ac_build_endif(&ctx->ac, 6502); + + /* Load clamped colors */ + for (unsigned i = 0; i < noutput; i++) { + if (outputs[i].semantic != VARYING_SLOT_COL0 && + outputs[i].semantic != VARYING_SLOT_COL1 && + outputs[i].semantic != VARYING_SLOT_BFC0 && + outputs[i].semantic != VARYING_SLOT_BFC1) + continue; + + for (unsigned j = 0; j < 4; j++) { + outputs[i].values[j] = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addr[i][j], ""); + } + } +} + /* Generate code for the hardware VS shader stage to go with a geometry shader */ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, @@ -496,8 +561,10 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, si_llvm_emit_streamout(&ctx, outputs, gsinfo->num_outputs, stream); } - if (stream == 0) + if (stream == 0) { + si_vertex_color_clamping(&ctx, outputs, gsinfo->num_outputs); si_llvm_build_vs_exports(&ctx, NULL, outputs, gsinfo->num_outputs); + } LLVMBuildBr(builder, end_bb); } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 950daf49feb..7e0ce7c8d75 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -500,71 +500,6 @@ static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, const LLV memcpy(&args->out[0], values, sizeof(values[0]) * 4); } -/** - * Vertex color clamping. - * - * This uses a state constant loaded in a user data SGPR and - * an IF statement is added that clamps all colors if the constant - * is true. - */ -static void si_vertex_color_clamping(struct si_shader_context *ctx, - struct si_shader_output_values *outputs, unsigned noutput) -{ - LLVMValueRef addr[SI_MAX_VS_OUTPUTS][4]; - bool has_colors = false; - - /* Store original colors to alloca variables. */ - for (unsigned i = 0; i < noutput; i++) { - if (outputs[i].semantic != VARYING_SLOT_COL0 && - outputs[i].semantic != VARYING_SLOT_COL1 && - outputs[i].semantic != VARYING_SLOT_BFC0 && - outputs[i].semantic != VARYING_SLOT_BFC1) - continue; - - for (unsigned j = 0; j < 4; j++) - addr[i][j] = ac_build_alloca_init(&ctx->ac, outputs[i].values[j], ""); - - has_colors = true; - } - - if (!has_colors) - return; - - /* The state is in the first bit of the user SGPR. */ - LLVMValueRef cond = GET_FIELD(ctx, VS_STATE_CLAMP_VERTEX_COLOR); - cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, ""); - - ac_build_ifcc(&ctx->ac, cond, 6502); - - /* Store clamped colors to alloca variables within the conditional block. */ - for (unsigned i = 0; i < noutput; i++) { - if (outputs[i].semantic != VARYING_SLOT_COL0 && - outputs[i].semantic != VARYING_SLOT_COL1 && - outputs[i].semantic != VARYING_SLOT_BFC0 && - outputs[i].semantic != VARYING_SLOT_BFC1) - continue; - - for (unsigned j = 0; j < 4; j++) { - LLVMBuildStore(ctx->ac.builder, ac_build_clamp(&ctx->ac, outputs[i].values[j]), - addr[i][j]); - } - } - ac_build_endif(&ctx->ac, 6502); - - /* Load clamped colors */ - for (unsigned i = 0; i < noutput; i++) { - if (outputs[i].semantic != VARYING_SLOT_COL0 && - outputs[i].semantic != VARYING_SLOT_COL1 && - outputs[i].semantic != VARYING_SLOT_BFC0 && - outputs[i].semantic != VARYING_SLOT_BFC1) - continue; - - for (unsigned j = 0; j < 4; j++) { - outputs[i].values[j] = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addr[i][j], ""); - } - } -} - /** * Generate export instructions for hardware VS shader stage or NGG GS stage * (position and parameter data only). @@ -584,8 +519,6 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, LLVMValueRef num_ex shader->selector->info.culldist_mask; int i; - si_vertex_color_clamping(ctx, outputs, noutput); - /* Build position exports. */ for (i = 0; i < noutput; i++) { switch (outputs[i].semantic) {