From f23f7ef3160ab2fde22dd5f180d5d2b71d59121c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sun, 31 Oct 2021 21:08:08 +0100 Subject: [PATCH] nir: Don't compact per-vertex and per-primitive outputs together. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent nir_compact_varyings from putting per-vertex and per-primitive output components in the same slot. Signed-off-by: Timur Kristóf Reviewed-by: Timothy Arceri Part-of: --- src/compiler/nir/nir_linking_helpers.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/compiler/nir/nir_linking_helpers.c b/src/compiler/nir/nir_linking_helpers.c index bc1ee4dd548..313a234a7f9 100644 --- a/src/compiler/nir/nir_linking_helpers.c +++ b/src/compiler/nir/nir_linking_helpers.c @@ -232,6 +232,8 @@ static uint8_t get_interp_type(nir_variable *var, const struct glsl_type *type, bool default_to_smooth_interp) { + if (var->data.per_primitive) + return INTERP_MODE_NONE; if (glsl_type_is_integer(type)) return INTERP_MODE_FLAT; else if (var->data.interpolation != INTERP_MODE_NONE) @@ -276,6 +278,7 @@ struct assigned_comps uint8_t interp_loc; bool is_32bit; bool is_mediump; + bool is_per_primitive; }; /* Packing arrays and dual slot varyings is difficult so to avoid complex @@ -347,6 +350,7 @@ get_unmoveable_components_masks(nir_shader *shader, comps[location + i].is_mediump = var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW; + comps[location + i].is_per_primitive = var->data.per_primitive; } } } @@ -465,6 +469,7 @@ struct varying_component { uint8_t interp_loc; bool is_32bit; bool is_patch; + bool is_per_primitive; bool is_mediump; bool is_intra_stage_only; bool initialised; @@ -480,6 +485,12 @@ cmp_varying_component(const void *comp1_v, const void *comp2_v) if (comp1->is_patch != comp2->is_patch) return comp1->is_patch ? 1 : -1; + /* Sort per-primitive outputs after per-vertex ones to allow + * better compaction when they are mixed in the shader's source. + */ + if (comp1->is_per_primitive != comp2->is_per_primitive) + return comp1->is_per_primitive ? 1 : -1; + /* We want to try to group together TCS outputs that are only read by other * TCS invocations and not consumed by the follow stage. */ @@ -600,6 +611,7 @@ gather_varying_component_info(nir_shader *producer, nir_shader *consumer, vc_info->interp_loc = get_interp_loc(in_var); vc_info->is_32bit = glsl_type_is_32bit(type); vc_info->is_patch = in_var->data.patch; + vc_info->is_per_primitive = in_var->data.per_primitive; vc_info->is_mediump = !producer->options->linker_ignore_precision && (in_var->data.precision == GLSL_PRECISION_MEDIUM || in_var->data.precision == GLSL_PRECISION_LOW); @@ -665,6 +677,7 @@ gather_varying_component_info(nir_shader *producer, nir_shader *consumer, vc_info->interp_loc = get_interp_loc(out_var); vc_info->is_32bit = glsl_type_is_32bit(type); vc_info->is_patch = out_var->data.patch; + vc_info->is_per_primitive = out_var->data.per_primitive; vc_info->is_mediump = !producer->options->linker_ignore_precision && (out_var->data.precision == GLSL_PRECISION_MEDIUM || out_var->data.precision == GLSL_PRECISION_LOW); @@ -749,6 +762,12 @@ assign_remap_locations(struct varying_loc (*remap)[4], for (; tmp_cursor < max_location; tmp_cursor++) { if (assigned_comps[tmp_cursor].comps) { + /* Don't pack per-primitive and per-vertex varyings together. */ + if (assigned_comps[tmp_cursor].is_per_primitive != info->is_per_primitive) { + tmp_comp = 0; + continue; + } + /* We can only pack varyings with matching precision. */ if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) { tmp_comp = 0; @@ -802,6 +821,7 @@ assign_remap_locations(struct varying_loc (*remap)[4], assigned_comps[tmp_cursor].interp_loc = info->interp_loc; assigned_comps[tmp_cursor].is_32bit = info->is_32bit; assigned_comps[tmp_cursor].is_mediump = info->is_mediump; + assigned_comps[tmp_cursor].is_per_primitive = info->is_per_primitive; /* Assign remap location */ remap[location][info->var->data.location_frac].component = tmp_comp++;