diff --git a/src/broadcom/common/v3d_limits.h b/src/broadcom/common/v3d_limits.h index 77684762203..d65edddab74 100644 --- a/src/broadcom/common/v3d_limits.h +++ b/src/broadcom/common/v3d_limits.h @@ -31,6 +31,7 @@ #define V3D_MAX_FS_INPUTS 64 #define V3D_MAX_VS_INPUTS 64 +#define V3D_MAX_ANY_STAGE_INPUTS MAX2(V3D_MAX_VS_INPUTS, V3D_MAX_FS_INPUTS) /* Not specifically a hardware limit, just coordination between compiler and * driver. diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index fbb4b64a365..e2a4665ed42 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -374,8 +374,8 @@ struct v3d_fs_key { struct v3d_vs_key { struct v3d_key base; - struct v3d_varying_slot fs_inputs[V3D_MAX_FS_INPUTS]; - uint8_t num_fs_inputs; + struct v3d_varying_slot used_outputs[V3D_MAX_ANY_STAGE_INPUTS]; + uint8_t num_used_outputs; bool is_coord; bool per_vertex_point_size; diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c index 2a68efb7b6b..ee9b29d0f2d 100644 --- a/src/broadcom/compiler/v3d_nir_lower_io.c +++ b/src/broadcom/compiler/v3d_nir_lower_io.c @@ -45,7 +45,7 @@ struct v3d_nir_lower_io_state { int psiz_vpm_offset; int varyings_vpm_offset; - BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_FS_INPUTS)]; + BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)]; nir_ssa_def *pos[4]; }; @@ -91,8 +91,8 @@ v3d_varying_slot_vpm_offset(struct v3d_compile *c, nir_variable *var, int chan) { int component = var->data.location_frac + chan; - for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { - struct v3d_varying_slot slot = c->vs_key->fs_inputs[i]; + for (int i = 0; i < c->vs_key->num_used_outputs; i++) { + struct v3d_varying_slot slot = c->vs_key->used_outputs[i]; if (v3d_slot_get_slot(slot) == var->data.location && v3d_slot_get_component(slot) == component) { @@ -255,7 +255,7 @@ v3d_nir_setup_vpm_layout(struct v3d_compile *c, state->varyings_vpm_offset = vpm_offset; - c->vpm_output_size = vpm_offset + c->vs_key->num_fs_inputs; + c->vpm_output_size = vpm_offset + c->vs_key->num_used_outputs; } static void @@ -306,7 +306,7 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b, * This should be undefined behavior, but glsl-routing seems to rely * on it. */ - for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { + for (int i = 0; i < c->vs_key->num_used_outputs; i++) { if (!BITSET_TEST(state->varyings_stored, i)) { v3d_nir_store_output(b, state->varyings_vpm_offset + i, nir_imm_int(b, 0)); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index da4234042ea..12009002f9b 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -749,9 +749,9 @@ v3d_nir_lower_vs_early(struct v3d_compile *c) NIR_PASS_V(c->s, nir_lower_io_to_scalar_early, nir_var_shader_in | nir_var_shader_out); uint64_t used_outputs[4] = {0}; - for (int i = 0; i < c->vs_key->num_fs_inputs; i++) { - int slot = v3d_slot_get_slot(c->vs_key->fs_inputs[i]); - int comp = v3d_slot_get_component(c->vs_key->fs_inputs[i]); + for (int i = 0; i < c->vs_key->num_used_outputs; i++) { + int slot = v3d_slot_get_slot(c->vs_key->used_outputs[i]); + int comp = v3d_slot_get_component(c->vs_key->used_outputs[i]); used_outputs[comp] |= 1ull << slot; } NIR_PASS_V(c->s, nir_remove_unused_io_vars, diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c index ff91b332cd2..75022d86c21 100644 --- a/src/gallium/drivers/v3d/v3d_program.c +++ b/src/gallium/drivers/v3d/v3d_program.c @@ -220,7 +220,7 @@ v3d_shader_precompile(struct v3d_context *v3d, int slot = var->data.location; for (int i = 0; i < glsl_get_components(var->type); i++) { int swiz = var->data.location_frac + i; - key.fs_inputs[key.num_fs_inputs++] = + key.used_outputs[key.num_used_outputs++] = v3d_slot_from_slot_and_component(slot, swiz); } @@ -230,9 +230,9 @@ v3d_shader_precompile(struct v3d_context *v3d, /* Compile VS bin shader: only position (XXX: include TF) */ key.is_coord = true; - key.num_fs_inputs = 0; + key.num_used_outputs = 0; for (int i = 0; i < 4; i++) { - key.fs_inputs[key.num_fs_inputs++] = + key.used_outputs[key.num_used_outputs++] = v3d_slot_from_slot_and_component(VARYING_SLOT_POS, i); } @@ -627,11 +627,11 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]); key->base.shader_state = v3d->prog.bind_vs; key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable; - key->num_fs_inputs = v3d->prog.fs->prog_data.fs->num_inputs; - STATIC_ASSERT(sizeof(key->fs_inputs) == + key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs; + STATIC_ASSERT(sizeof(key->used_outputs) == sizeof(v3d->prog.fs->prog_data.fs->input_slots)); - memcpy(key->fs_inputs, v3d->prog.fs->prog_data.fs->input_slots, - sizeof(key->fs_inputs)); + memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots, + sizeof(key->used_outputs)); key->clamp_color = v3d->rasterizer->base.clamp_vertex_color; key->per_vertex_point_size = @@ -648,15 +648,15 @@ v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode) key->is_coord = true; /* Coord shaders only output varyings used by transform feedback. */ struct v3d_uncompiled_shader *shader_state = key->base.shader_state; - memcpy(key->fs_inputs, shader_state->tf_outputs, - sizeof(*key->fs_inputs) * shader_state->num_tf_outputs); - if (shader_state->num_tf_outputs < key->num_fs_inputs) { - memset(&key->fs_inputs[shader_state->num_tf_outputs], + memcpy(key->used_outputs, shader_state->tf_outputs, + sizeof(*key->used_outputs) * shader_state->num_tf_outputs); + if (shader_state->num_tf_outputs < key->num_used_outputs) { + memset(&key->used_outputs[shader_state->num_tf_outputs], 0, - sizeof(*key->fs_inputs) * (key->num_fs_inputs - + sizeof(*key->used_outputs) * (key->num_used_outputs - shader_state->num_tf_outputs)); } - key->num_fs_inputs = shader_state->num_tf_outputs; + key->num_used_outputs = shader_state->num_tf_outputs; struct v3d_compiled_shader *cs = v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));