diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index c6a45d82f31..b2eede8fece 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5087,6 +5087,7 @@ nir_shader * nir_create_passthrough_tcs(const nir_shader_compiler_options *optio nir_shader * nir_create_passthrough_gs(const nir_shader_compiler_options *options, const nir_shader *prev_stage, enum shader_prim primitive_type, + int flat_interp_mask_offset, bool emulate_edgeflags, bool force_line_strip_out); diff --git a/src/compiler/nir/nir_passthrough_gs.c b/src/compiler/nir/nir_passthrough_gs.c index c6b24ff6dec..6ffdfe09857 100644 --- a/src/compiler/nir/nir_passthrough_gs.c +++ b/src/compiler/nir/nir_passthrough_gs.c @@ -129,6 +129,7 @@ nir_shader * nir_create_passthrough_gs(const nir_shader_compiler_options *options, const nir_shader *prev_stage, enum shader_prim primitive_type, + int flat_interp_mask_offset, bool emulate_edgeflags, bool force_line_strip_out) { @@ -217,15 +218,25 @@ nir_create_passthrough_gs(const nir_shader_compiler_options *options, } nir_variable *edge_var = nir_find_variable_with_location(nir, nir_var_shader_in, VARYING_SLOT_EDGE); + nir_ssa_def *flat_interp_mask_def = nir_load_ubo(&b, 1, 32, + nir_imm_int(&b, 0), nir_imm_int(&b, flat_interp_mask_offset), + .align_mul = 4, .align_offset = 0, .range_base = 0, .range = ~0); for (unsigned i = start_vert; i < end_vert || needs_closing; i += vert_step) { int idx = i < end_vert ? i : start_vert; /* Copy inputs to outputs. */ - for (unsigned j = 0, oj = 0; j < num_inputs; ++j) { + for (unsigned j = 0, oj = 0, of = 0; j < num_inputs; ++j) { if (in_vars[j]->data.location == VARYING_SLOT_EDGE) { continue; } /* no need to use copy_var to save a lower pass */ - nir_ssa_def *value = nir_load_array_var_imm(&b, in_vars[j], idx); + nir_ssa_def *index; + if (in_vars[j]->data.location == VARYING_SLOT_POS) + index = nir_imm_int(&b, idx); + else { + unsigned mask = 1u << (of++); + index = nir_bcsel(&b, nir_ieq_imm(&b, nir_iand_imm(&b, flat_interp_mask_def, mask), 0), nir_imm_int(&b, idx), nir_imm_int(&b, start_vert)); + } + nir_ssa_def *value = nir_load_array_var(&b, in_vars[j], index); nir_store_var(&b, out_vars[oj], value, (1u << value->num_components) - 1); ++oj; diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index a2afd765180..407c72252f8 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -4490,7 +4490,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, bool needs_size = analyze_io(ret, nir); NIR_PASS_V(nir, unbreak_bos, ret, needs_size); /* run in compile if there could be inlined uniforms */ - if (!screen->driconf.inline_uniforms) { + if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) { NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared); NIR_PASS_V(nir, rewrite_bo_access, screen); NIR_PASS_V(nir, remove_bo_access, ret); diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index a4270a29db4..97fc5dd75b9 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -29,6 +29,8 @@ #define ZINK_WORKGROUP_SIZE_X 1 #define ZINK_WORKGROUP_SIZE_Y 2 #define ZINK_WORKGROUP_SIZE_Z 3 +#define ZINK_INLINE_VAL_FLAT_MASK 0 +#define ZINK_INLINE_VAL_PV_LAST_VERT 1 /* stop inlining shaders if they have >limit ssa vals after inlining: * recompile time isn't worth the inline diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index ab15ed048c2..a6b201752ec 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -1387,7 +1387,10 @@ zink_set_inlinable_constants(struct pipe_context *pctx, if (shader == MESA_SHADER_COMPUTE) { key = &ctx->compute_pipeline_state.key; } else { - assert(!zink_screen(pctx->screen)->optimal_keys); + assert(!zink_screen(pctx->screen)->optimal_keys || + (shader == MESA_SHADER_GEOMETRY && + ctx->gfx_stages[MESA_SHADER_GEOMETRY] && + ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated)); key = &ctx->gfx_pipeline_state.shader_keys.key[shader]; } inlinable_uniforms = key->base.inlined_uniform_values; diff --git a/src/gallium/drivers/zink/zink_draw.cpp b/src/gallium/drivers/zink/zink_draw.cpp index 9bbd363ef1c..5ab37b24c7b 100644 --- a/src/gallium/drivers/zink/zink_draw.cpp +++ b/src/gallium/drivers/zink/zink_draw.cpp @@ -288,7 +288,7 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum { VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline; const struct zink_screen *screen = zink_screen(ctx->base.screen); - if (screen->optimal_keys) + if (screen->optimal_keys && !ctx->is_generated_gs_bound) zink_gfx_program_update_optimal(ctx); else zink_gfx_program_update(ctx); diff --git a/src/gallium/drivers/zink/zink_pipeline.c b/src/gallium/drivers/zink/zink_pipeline.c index 6d7f716e28b..9fed3c22aaa 100644 --- a/src/gallium/drivers/zink/zink_pipeline.c +++ b/src/gallium/drivers/zink/zink_pipeline.c @@ -881,4 +881,4 @@ zink_find_or_create_output(struct zink_context *ctx) he = _mesa_set_add_pre_hashed(&ctx->gfx_outputs, hash, okey); } return (struct zink_gfx_output_key*)he->key; -} \ No newline at end of file +} diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index ddea6996aee..b11a01b8b6a 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -1575,6 +1575,9 @@ bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shad static void unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader) { + if (ctx->gfx_stages[stage]->non_fs.is_generated) + ctx->inlinable_uniforms_valid_mask &= ~BITFIELD_BIT(MESA_SHADER_GEOMETRY); + for (int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) { for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) { if (ctx->gfx_stages[stage]->non_fs.generated_gs[i][j] && @@ -1582,6 +1585,8 @@ unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink ctx->gfx_stages[stage]->non_fs.generated_gs[i][j]) { assert(stage != MESA_SHADER_GEOMETRY); /* let's not keep recursing! */ bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL); + ctx->is_generated_gs_bound = false; + ctx->inlinable_uniforms_valid_mask &= ~BITFIELD_BIT(MESA_SHADER_GEOMETRY); } } } @@ -2220,6 +2225,25 @@ zink_rast_prim_for_pipe(enum pipe_prim_type prim) } } +static inline void +zink_add_inline_uniform(nir_shader *shader, int offset) +{ + shader->info.inlinable_uniform_dw_offsets[shader->info.num_inlinable_uniforms] = offset; + ++shader->info.num_inlinable_uniforms; +} + +static uint32_t +zink_flat_flags(struct nir_shader *shader) +{ + uint32_t flat_flags = 0, c = 0; + nir_foreach_shader_in_variable(var, shader) { + if (var->data.interpolation == INTERP_MODE_FLAT) + flat_flags |= 1u << (c++); + } + + return flat_flags; +} + void zink_set_primitive_emulation_keys(struct zink_context *ctx) { @@ -2290,20 +2314,26 @@ zink_set_primitive_emulation_keys(struct zink_context *ctx) &screen->nir_options, ctx->gfx_stages[prev_vertex_stage]->nir, ctx->gfx_pipeline_state.gfx_prim_mode, + ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t), lower_edge_flags, lower_line_stipple || lower_quad_prim); } - struct zink_shader *shader = zink_shader_create(screen, nir, NULL); + zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK); + struct zink_shader *shader = zink_shader_create(screen, nir, &ctx->gfx_stages[prev_vertex_stage]->sinfo.so_info); shader->needs_inlining = true; ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type] = shader; shader->non_fs.is_generated = true; + shader->can_inline = true; } bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]); ctx->is_generated_gs_bound = true; } + + ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 1, + (uint32_t []){zink_flat_flags(ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir)}); } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] && ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated) bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL); @@ -2344,20 +2374,26 @@ zink_create_primitive_emulation_gs(struct zink_context *ctx) &screen->nir_options, ctx->gfx_stages[prev_vertex_stage]->nir, ctx->gfx_pipeline_state.gfx_prim_mode, + ZINK_INLINE_VAL_FLAT_MASK * 4, lower_edge_flags, lower_quad_prim); } - struct zink_shader *shader = zink_shader_create(screen, nir, NULL); + zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK); + struct zink_shader *shader = zink_shader_create(screen, nir, &ctx->gfx_stages[prev_vertex_stage]->sinfo.so_info); shader->needs_inlining = true; ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type] = shader; shader->non_fs.is_generated = true; + shader->can_inline = true; } bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]); ctx->is_generated_gs_bound = true; } + + ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 1, + (uint32_t []){zink_flat_flags(ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir)}); } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] && ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated) bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL); diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h index 7aa4cb8b97f..ce46a6de718 100644 --- a/src/gallium/drivers/zink/zink_program.h +++ b/src/gallium/drivers/zink/zink_program.h @@ -414,7 +414,8 @@ zink_can_use_pipeline_libs(const struct zink_context *ctx) !ctx->gfx_stages[MESA_SHADER_FRAGMENT]->nir->info.fs.uses_sample_shading && !zink_get_fs_base_key(ctx)->fbfetch_ms && !ctx->gfx_pipeline_state.force_persample_interp && - !ctx->gfx_pipeline_state.min_samples; + !ctx->gfx_pipeline_state.min_samples && + !ctx->is_generated_gs_bound; } bool diff --git a/src/gallium/drivers/zink/zink_program_state.hpp b/src/gallium/drivers/zink/zink_program_state.hpp index 17ed293179b..4137d3d7e94 100644 --- a/src/gallium/drivers/zink/zink_program_state.hpp +++ b/src/gallium/drivers/zink/zink_program_state.hpp @@ -124,7 +124,7 @@ zink_get_gfx_pipeline(struct zink_context *ctx, state->dirty = false; } /* extra safety asserts for optimal path to catch refactoring bugs */ - if (screen->optimal_keys) { + if (prog->optimal_keys) { ASSERTED const union zink_shader_key_optimal *opt = (union zink_shader_key_optimal*)&prog->last_variant_hash; assert(opt->val == state->shader_keys_optimal.key.val); assert(state->optimal_key == state->shader_keys_optimal.key.val);