radv,aco: implement GS copy shaders using NIR
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18898>
This commit is contained in:
@@ -7810,10 +7810,9 @@ visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
|
||||
unsigned offset = 0;
|
||||
for (unsigned i = 0; i <= VARYING_SLOT_VAR31; i++) {
|
||||
if (ctx->program->info.gs.output_streams[i] != stream)
|
||||
continue;
|
||||
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
if (((ctx->program->info.gs.output_streams[i] >> (j * 2)) & 0x3) != stream)
|
||||
continue;
|
||||
if (!(ctx->program->info.gs.output_usage_mask[i] & (1 << j)))
|
||||
continue;
|
||||
|
||||
@@ -12179,7 +12178,7 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_
|
||||
|
||||
unsigned offset = 0;
|
||||
for (unsigned i = 0; i <= VARYING_SLOT_VAR31; ++i) {
|
||||
if (program->info.gs.output_streams[i] != stream)
|
||||
if ((program->info.gs.output_streams[i] & 0x3) != stream)
|
||||
continue;
|
||||
|
||||
unsigned output_usage_mask = program->info.gs.output_usage_mask[i];
|
||||
|
@@ -230,9 +230,7 @@ aco_compile_shader(const struct aco_compiler_options* options,
|
||||
program->debug.private_data = options->debug.private_data;
|
||||
|
||||
/* Instruction Selection */
|
||||
if (args->is_gs_copy_shader)
|
||||
aco::select_gs_copy_shader(program.get(), shaders[0], &config, options, info, args);
|
||||
else if (args->is_trap_handler_shader)
|
||||
if (args->is_trap_handler_shader)
|
||||
aco::select_trap_handler_shader(program.get(), shaders[0], &config, options, info, args);
|
||||
else
|
||||
aco::select_program(program.get(), shader_count, shaders, &config, options, info, args);
|
||||
|
@@ -205,10 +205,12 @@ visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMV
|
||||
bool *is_16bit_ptr = &abi->is_16bit[i * 4];
|
||||
int length = util_last_bit(output_usage_mask);
|
||||
|
||||
if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
|
||||
if (!(ctx->output_mask & (1ull << i)))
|
||||
continue;
|
||||
|
||||
for (unsigned j = 0; j < length; j++) {
|
||||
if (((output_stream >> (j * 2)) & 0x3) != stream)
|
||||
continue;
|
||||
if (!(output_usage_mask & (1 << j)))
|
||||
continue;
|
||||
|
||||
@@ -1029,11 +1031,11 @@ handle_vs_outputs_post(struct radv_shader_context *ctx)
|
||||
outputs[noutput].slot_name = i;
|
||||
outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
|
||||
|
||||
if (ctx->stage == MESA_SHADER_VERTEX && !ctx->args->is_gs_copy_shader) {
|
||||
if (ctx->stage == MESA_SHADER_VERTEX) {
|
||||
outputs[noutput].usage_mask = ctx->shader_info->vs.output_usage_mask[i];
|
||||
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
|
||||
outputs[noutput].usage_mask = ctx->shader_info->tes.output_usage_mask[i];
|
||||
} else if (ctx->args->is_gs_copy_shader|| ctx->stage == MESA_SHADER_GEOMETRY) {
|
||||
} else if (ctx->stage == MESA_SHADER_GEOMETRY) {
|
||||
outputs[noutput].usage_mask = ctx->shader_info->gs.output_usage_mask[i];
|
||||
}
|
||||
|
||||
@@ -1661,7 +1663,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
|
||||
offset = 0;
|
||||
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
|
||||
unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i];
|
||||
unsigned output_stream = ctx->shader_info->gs.output_streams[i];
|
||||
unsigned output_stream = ctx->shader_info->gs.output_streams[i] & 0x3;
|
||||
int length = util_last_bit(output_usage_mask);
|
||||
|
||||
if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
|
||||
@@ -1764,9 +1766,5 @@ llvm_compile_shader(const struct radv_nir_compiler_options *options,
|
||||
|
||||
radv_init_llvm_compiler(&ac_llvm, options->family, tm_options, info->wave_size);
|
||||
|
||||
if (args->is_gs_copy_shader) {
|
||||
radv_compile_gs_copy_shader(&ac_llvm, options, info, *shaders, binary, args);
|
||||
} else {
|
||||
radv_compile_nir_shader(&ac_llvm, options, info, binary, args, shaders, shader_count);
|
||||
}
|
||||
}
|
||||
|
@@ -3597,15 +3597,21 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline,
|
||||
struct radv_shader_binary **gs_copy_binary)
|
||||
{
|
||||
struct radv_device *device = pipeline->device;
|
||||
struct radv_shader_info info = {0};
|
||||
|
||||
radv_nir_shader_info_pass(device, stages[MESA_SHADER_GEOMETRY].nir, pipeline_layout, pipeline_key,
|
||||
&info);
|
||||
const struct radv_shader_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info;
|
||||
nir_shader *nir =
|
||||
ac_nir_create_gs_copy_shader(stages[MESA_SHADER_GEOMETRY].nir, false, VARYING_SLOT_MAX,
|
||||
gs_info->gs.output_usage_mask, gs_info->gs.output_streams, NULL);
|
||||
nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader");
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
||||
struct radv_shader_info info = {0};
|
||||
radv_nir_shader_info_pass(device, nir, pipeline_layout, pipeline_key, &info);
|
||||
info.wave_size = 64; /* Wave32 not supported. */
|
||||
info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
|
||||
info.ballot_bit_size = 64;
|
||||
info.so = gs_info->so;
|
||||
|
||||
if (stages[MESA_SHADER_GEOMETRY].info.outinfo.export_clip_dists) {
|
||||
if (gs_info->outinfo.export_clip_dists) {
|
||||
if (stages[MESA_SHADER_GEOMETRY].nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
|
||||
info.outinfo.vs_output_param_offset[VARYING_SLOT_CLIP_DIST0] = info.outinfo.param_exports++;
|
||||
if (stages[MESA_SHADER_GEOMETRY].nir->info.outputs_written & VARYING_BIT_CLIP_DIST1)
|
||||
@@ -3622,8 +3628,12 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline,
|
||||
info.user_sgprs_locs = gs_copy_args.user_sgprs_locs;
|
||||
info.inline_push_constant_mask = gs_copy_args.ac.inline_push_const_mask;
|
||||
|
||||
return radv_create_gs_copy_shader(device, stages[MESA_SHADER_GEOMETRY].nir, &info, &gs_copy_args,
|
||||
gs_copy_binary, keep_executable_info, keep_statistic_info,
|
||||
NIR_PASS_V(nir, radv_nir_lower_abi, device->physical_device->rad_info.gfx_level, &info,
|
||||
&gs_copy_args, pipeline_key, radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX),
|
||||
device->physical_device->rad_info.address32_hi);
|
||||
|
||||
return radv_create_gs_copy_shader(device, nir, &info, &gs_copy_args, gs_copy_binary,
|
||||
keep_executable_info, keep_statistic_info,
|
||||
pipeline_key->optimisations_disabled);
|
||||
}
|
||||
|
||||
|
@@ -2380,14 +2380,14 @@ static struct radv_shader *
|
||||
shader_compile(struct radv_device *device, struct nir_shader *const *shaders, int shader_count,
|
||||
gl_shader_stage stage, const struct radv_shader_info *info,
|
||||
const struct radv_shader_args *args, const struct radv_pipeline_key *key,
|
||||
bool gs_copy_shader, bool trap_handler_shader, bool keep_shader_info,
|
||||
bool keep_statistic_info, struct radv_shader_binary **binary_out)
|
||||
bool trap_handler_shader, bool keep_shader_info, bool keep_statistic_info,
|
||||
struct radv_shader_binary **binary_out)
|
||||
{
|
||||
struct radv_nir_compiler_options options = {0};
|
||||
radv_fill_nir_compiler_options(
|
||||
&options, device, key, radv_should_use_wgp_mode(device, stage, info),
|
||||
radv_can_dump_shader(device, shaders[0], gs_copy_shader || trap_handler_shader),
|
||||
is_meta_shader(shaders[0]), keep_shader_info, keep_statistic_info);
|
||||
radv_can_dump_shader(device, shaders[0], trap_handler_shader), is_meta_shader(shaders[0]),
|
||||
keep_shader_info, keep_statistic_info);
|
||||
|
||||
struct radv_shader_debug_data debug_data = {
|
||||
.device = device,
|
||||
@@ -2451,7 +2451,7 @@ radv_shader_nir_to_asm(struct radv_device *device, struct radv_pipeline_stage *p
|
||||
gl_shader_stage stage = shaders[shader_count - 1]->info.stage;
|
||||
|
||||
return shader_compile(device, shaders, shader_count, stage, &pl_stage->info, &pl_stage->args,
|
||||
key, false, false, keep_shader_info, keep_statistic_info, binary_out);
|
||||
key, false, keep_shader_info, keep_statistic_info, binary_out);
|
||||
}
|
||||
|
||||
struct radv_shader *
|
||||
@@ -2466,7 +2466,7 @@ radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *shader
|
||||
.optimisations_disabled = disable_optimizations,
|
||||
};
|
||||
|
||||
return shader_compile(device, &shader, 1, stage, info, args, &key, true, false, keep_shader_info,
|
||||
return shader_compile(device, &shader, 1, stage, info, args, &key, false, keep_shader_info,
|
||||
keep_statistic_info, binary_out);
|
||||
}
|
||||
|
||||
@@ -2494,8 +2494,8 @@ radv_create_trap_handler_shader(struct radv_device *device)
|
||||
radv_declare_shader_args(device->physical_device->rad_info.gfx_level, &key, &info, stage, false,
|
||||
MESA_SHADER_VERTEX, &args);
|
||||
|
||||
shader = shader_compile(device, &b.shader, 1, stage, &info, &args, &key, false, true, false,
|
||||
false, &binary);
|
||||
shader =
|
||||
shader_compile(device, &b.shader, 1, stage, &info, &args, &key, true, false, false, &binary);
|
||||
|
||||
trap->alloc = radv_alloc_shader_memory(device, shader->code_size, NULL);
|
||||
|
||||
|
@@ -463,7 +463,7 @@ gather_shader_info_gs(const nir_shader *nir, struct radv_shader_info *info)
|
||||
assert(stream < 4);
|
||||
|
||||
info->gs.num_stream_output_components[stream] += num_components;
|
||||
info->gs.output_streams[idx] = stream;
|
||||
info->gs.output_streams[idx] = stream | (stream << 2) | (stream << 4) | (stream << 6);
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user