radv: gather info about PS inputs in the shader info pass
It's the right place to do that. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -4023,11 +4023,11 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
|
for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.info.ps.input_mask; ++i) {
|
||||||
unsigned vs_offset;
|
unsigned vs_offset;
|
||||||
bool flat_shade;
|
bool flat_shade;
|
||||||
bool float16;
|
bool float16;
|
||||||
if (!(ps->info.fs.input_mask & (1u << i)))
|
if (!(ps->info.info.ps.input_mask & (1u << i)))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
|
vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
|
||||||
@@ -4037,8 +4037,8 @@ radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));
|
flat_shade = !!(ps->info.info.ps.flat_shaded_mask & (1u << ps_offset));
|
||||||
float16 = !!(ps->info.fs.float16_shaded_mask & (1u << ps_offset));
|
float16 = !!(ps->info.info.ps.float16_shaded_mask & (1u << ps_offset));
|
||||||
|
|
||||||
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
|
ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, float16);
|
||||||
++ps_offset;
|
++ps_offset;
|
||||||
@@ -4113,7 +4113,7 @@ radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs,
|
|||||||
ps->config.spi_ps_input_addr);
|
ps->config.spi_ps_input_addr);
|
||||||
|
|
||||||
radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
|
radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
|
||||||
S_0286D8_NUM_INTERP(ps->info.fs.num_interp) |
|
S_0286D8_NUM_INTERP(ps->info.info.ps.num_interp) |
|
||||||
S_0286D8_PS_W32_EN(ps->info.info.wave_size == 32));
|
S_0286D8_PS_W32_EN(ps->info.info.wave_size == 32));
|
||||||
|
|
||||||
radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
|
radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
|
||||||
|
@@ -453,53 +453,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
|||||||
return nir;
|
return nir;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mark_16bit_fs_input(struct radv_shader_variant_info *shader_info,
|
|
||||||
const struct glsl_type *type,
|
|
||||||
int location)
|
|
||||||
{
|
|
||||||
if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
|
|
||||||
unsigned attrib_count = glsl_count_attribute_slots(type, false);
|
|
||||||
if (glsl_type_is_16bit(type)) {
|
|
||||||
shader_info->fs.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
|
|
||||||
}
|
|
||||||
} else if (glsl_type_is_array(type)) {
|
|
||||||
unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
|
|
||||||
for (unsigned i = 0; i < glsl_get_length(type); ++i) {
|
|
||||||
mark_16bit_fs_input(shader_info, glsl_get_array_element(type), location + i * stride);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
assert(glsl_type_is_struct_or_ifc(type));
|
|
||||||
for (unsigned i = 0; i < glsl_get_length(type); i++) {
|
|
||||||
mark_16bit_fs_input(shader_info, glsl_get_struct_field(type, i), location);
|
|
||||||
location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
handle_fs_input_decl(struct radv_shader_variant_info *shader_info,
|
|
||||||
struct nir_variable *variable)
|
|
||||||
{
|
|
||||||
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
|
|
||||||
|
|
||||||
if (variable->data.compact) {
|
|
||||||
unsigned component_count = variable->data.location_frac +
|
|
||||||
glsl_get_length(variable->type);
|
|
||||||
attrib_count = (component_count + 3) / 4;
|
|
||||||
} else {
|
|
||||||
mark_16bit_fs_input(shader_info, variable->type,
|
|
||||||
variable->data.driver_location);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t mask = ((1ull << attrib_count) - 1);
|
|
||||||
|
|
||||||
if (variable->data.interpolation == INTERP_MODE_FLAT)
|
|
||||||
shader_info->fs.flat_shaded_mask |= mask << variable->data.driver_location;
|
|
||||||
|
|
||||||
if (variable->data.location >= VARYING_SLOT_VAR0)
|
|
||||||
shader_info->fs.input_mask |= mask << (variable->data.location - VARYING_SLOT_VAR0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
type_size_vec4(const struct glsl_type *type, bool bindless)
|
type_size_vec4(const struct glsl_type *type, bool bindless)
|
||||||
{
|
{
|
||||||
@@ -567,28 +520,13 @@ lower_view_index(nir_shader *nir)
|
|||||||
return progress;
|
return progress;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Gather information needed to setup the vs<->ps linking registers in
|
|
||||||
* radv_pipeline_generate_ps_inputs().
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
handle_fs_inputs(nir_shader *nir, struct radv_shader_variant_info *shader_info)
|
lower_fs_io(nir_shader *nir)
|
||||||
{
|
|
||||||
shader_info->fs.num_interp = nir->num_inputs;
|
|
||||||
|
|
||||||
nir_foreach_variable(variable, &nir->inputs)
|
|
||||||
handle_fs_input_decl(shader_info, variable);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
lower_fs_io(nir_shader *nir, struct radv_shader_variant_info *shader_info)
|
|
||||||
{
|
{
|
||||||
NIR_PASS_V(nir, lower_view_index);
|
NIR_PASS_V(nir, lower_view_index);
|
||||||
nir_assign_io_var_locations(&nir->inputs, &nir->num_inputs,
|
nir_assign_io_var_locations(&nir->inputs, &nir->num_inputs,
|
||||||
MESA_SHADER_FRAGMENT);
|
MESA_SHADER_FRAGMENT);
|
||||||
|
|
||||||
handle_fs_inputs(nir, shader_info);
|
|
||||||
|
|
||||||
NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
|
NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
|
||||||
|
|
||||||
/* This pass needs actual constants */
|
/* This pass needs actual constants */
|
||||||
@@ -1135,7 +1073,7 @@ shader_variant_compile(struct radv_device *device,
|
|||||||
bool thread_compiler;
|
bool thread_compiler;
|
||||||
|
|
||||||
if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT)
|
if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT)
|
||||||
lower_fs_io(shaders[0], &variant_info);
|
lower_fs_io(shaders[0]);
|
||||||
|
|
||||||
options->family = chip_family;
|
options->family = chip_family;
|
||||||
options->chip_class = device->physical_device->rad_info.chip_class;
|
options->chip_class = device->physical_device->rad_info.chip_class;
|
||||||
@@ -1339,7 +1277,7 @@ radv_get_max_waves(struct radv_device *device,
|
|||||||
|
|
||||||
if (stage == MESA_SHADER_FRAGMENT) {
|
if (stage == MESA_SHADER_FRAGMENT) {
|
||||||
lds_per_wave = conf->lds_size * lds_increment +
|
lds_per_wave = conf->lds_size * lds_increment +
|
||||||
align(variant->info.fs.num_interp * 48,
|
align(variant->info.info.ps.num_interp * 48,
|
||||||
lds_increment);
|
lds_increment);
|
||||||
} else if (stage == MESA_SHADER_COMPUTE) {
|
} else if (stage == MESA_SHADER_COMPUTE) {
|
||||||
unsigned max_workgroup_size =
|
unsigned max_workgroup_size =
|
||||||
|
@@ -210,6 +210,10 @@ struct radv_shader_info {
|
|||||||
bool prim_id_input;
|
bool prim_id_input;
|
||||||
bool layer_input;
|
bool layer_input;
|
||||||
uint8_t num_input_clips_culls;
|
uint8_t num_input_clips_culls;
|
||||||
|
uint32_t input_mask;
|
||||||
|
uint32_t flat_shaded_mask;
|
||||||
|
uint32_t float16_shaded_mask;
|
||||||
|
uint32_t num_interp;
|
||||||
} ps;
|
} ps;
|
||||||
struct {
|
struct {
|
||||||
bool uses_grid_size;
|
bool uses_grid_size;
|
||||||
@@ -270,10 +274,6 @@ struct radv_shader_variant_info {
|
|||||||
bool export_prim_id;
|
bool export_prim_id;
|
||||||
} vs;
|
} vs;
|
||||||
struct {
|
struct {
|
||||||
unsigned num_interp;
|
|
||||||
uint32_t input_mask;
|
|
||||||
uint32_t flat_shaded_mask;
|
|
||||||
uint32_t float16_shaded_mask;
|
|
||||||
bool can_discard;
|
bool can_discard;
|
||||||
bool early_fragment_test;
|
bool early_fragment_test;
|
||||||
bool post_depth_coverage;
|
bool post_depth_coverage;
|
||||||
|
@@ -393,6 +393,28 @@ gather_info_input_decl_vs(const nir_shader *nir, const nir_variable *var,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
mark_16bit_ps_input(struct radv_shader_info *info, const struct glsl_type *type,
|
||||||
|
int location)
|
||||||
|
{
|
||||||
|
if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
|
||||||
|
unsigned attrib_count = glsl_count_attribute_slots(type, false);
|
||||||
|
if (glsl_type_is_16bit(type)) {
|
||||||
|
info->ps.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
|
||||||
|
}
|
||||||
|
} else if (glsl_type_is_array(type)) {
|
||||||
|
unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
|
||||||
|
for (unsigned i = 0; i < glsl_get_length(type); ++i) {
|
||||||
|
mark_16bit_ps_input(info, glsl_get_array_element(type), location + i * stride);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assert(glsl_type_is_struct_or_ifc(type));
|
||||||
|
for (unsigned i = 0; i < glsl_get_length(type); i++) {
|
||||||
|
mark_16bit_ps_input(info, glsl_get_struct_field(type, i), location);
|
||||||
|
location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
static void
|
static void
|
||||||
gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
|
gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
|
||||||
struct radv_shader_info *info)
|
struct radv_shader_info *info)
|
||||||
@@ -423,6 +445,22 @@ gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
|
|||||||
if (var->data.sample)
|
if (var->data.sample)
|
||||||
info->ps.force_persample = true;
|
info->ps.force_persample = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (var->data.compact) {
|
||||||
|
unsigned component_count = var->data.location_frac +
|
||||||
|
glsl_get_length(var->type);
|
||||||
|
attrib_count = (component_count + 3) / 4;
|
||||||
|
} else {
|
||||||
|
mark_16bit_ps_input(info, var->type, var->data.driver_location);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t mask = ((1ull << attrib_count) - 1);
|
||||||
|
|
||||||
|
if (var->data.interpolation == INTERP_MODE_FLAT)
|
||||||
|
info->ps.flat_shaded_mask |= mask << var->data.driver_location;
|
||||||
|
|
||||||
|
if (var->data.location >= VARYING_SLOT_VAR0)
|
||||||
|
info->ps.input_mask |= mask << (var->data.location - VARYING_SLOT_VAR0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -597,4 +635,7 @@ radv_nir_shader_info_pass(const struct nir_shader *nir,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||||
|
info->ps.num_interp = nir->num_inputs;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user