diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 516d89dca4f..ea5971feeb8 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -245,6 +245,8 @@ brw_prog_data_size(gl_shader_stage stage) [MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_data), [MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_data), [MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_data), + [MESA_SHADER_TASK] = sizeof(struct brw_task_prog_data), + [MESA_SHADER_MESH] = sizeof(struct brw_mesh_prog_data), [MESA_SHADER_RAYGEN] = sizeof(struct brw_bs_prog_data), [MESA_SHADER_ANY_HIT] = sizeof(struct brw_bs_prog_data), [MESA_SHADER_CLOSEST_HIT] = sizeof(struct brw_bs_prog_data), @@ -267,6 +269,8 @@ brw_prog_key_size(gl_shader_stage stage) [MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_key), [MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_key), [MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_key), + [MESA_SHADER_TASK] = sizeof(struct brw_task_prog_key), + [MESA_SHADER_MESH] = sizeof(struct brw_mesh_prog_key), [MESA_SHADER_RAYGEN] = sizeof(struct brw_bs_prog_key), [MESA_SHADER_ANY_HIT] = sizeof(struct brw_bs_prog_key), [MESA_SHADER_CLOSEST_HIT] = sizeof(struct brw_bs_prog_key), diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 9e6dab8b6b3..12700a63b2c 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -387,6 +387,16 @@ struct brw_gs_prog_key unsigned nr_userclip_plane_consts:4; }; +struct brw_task_prog_key +{ + struct brw_base_prog_key base; +}; + +struct brw_mesh_prog_key +{ + struct brw_base_prog_key base; +}; + enum brw_sf_primitive { BRW_SF_PRIM_POINTS = 0, BRW_SF_PRIM_LINES = 1, @@ -547,6 +557,8 @@ union brw_any_prog_key { struct brw_wm_prog_key wm; struct brw_cs_prog_key cs; struct brw_bs_prog_key bs; + struct brw_task_prog_key task; + struct brw_mesh_prog_key mesh; }; /* @@ -1444,6 +1456,24 @@ struct brw_mue_map { uint32_t per_vertex_pitch_dw; }; +struct brw_task_prog_data { + struct brw_cs_prog_data base; + struct brw_tue_map map; +}; + +enum brw_mesh_index_format { + BRW_INDEX_FORMAT_U32, +}; + +struct brw_mesh_prog_data { + struct brw_cs_prog_data base; + struct brw_mue_map map; + + uint16_t primitive_type; + + enum brw_mesh_index_format index_format; +}; + /* brw_any_prog_data is prog_data for any stage that maps to an API stage */ union brw_any_prog_data { struct brw_stage_prog_data base; @@ -1455,6 +1485,8 @@ union brw_any_prog_data { struct brw_wm_prog_data wm; struct brw_cs_prog_data cs; struct brw_bs_prog_data bs; + struct brw_task_prog_data task; + struct brw_mesh_prog_data mesh; }; #define DEFINE_PROG_DATA_DOWNCAST(STAGE, CHECK) \ @@ -1486,6 +1518,9 @@ DEFINE_PROG_DATA_DOWNCAST(vue, prog_data->stage == MESA_SHADER_VERTEX || prog_data->stage == MESA_SHADER_TESS_EVAL || prog_data->stage == MESA_SHADER_GEOMETRY) +DEFINE_PROG_DATA_DOWNCAST(task, prog_data->stage == MESA_SHADER_TASK) +DEFINE_PROG_DATA_DOWNCAST(mesh, prog_data->stage == MESA_SHADER_MESH) + /* These are not really brw_stage_prog_data. */ DEFINE_PROG_DATA_DOWNCAST(ff_gs, true) DEFINE_PROG_DATA_DOWNCAST(clip, true) @@ -1642,6 +1677,41 @@ brw_compile_clip(const struct brw_compiler *compiler, struct brw_vue_map *vue_map, unsigned *final_assembly_size); +struct brw_compile_task_params { + struct nir_shader *nir; + + const struct brw_task_prog_key *key; + struct brw_task_prog_data *prog_data; + + struct brw_compile_stats *stats; + + char *error_str; + void *log_data; +}; + +const unsigned * +brw_compile_task(const struct brw_compiler *compiler, + void *mem_ctx, + struct brw_compile_task_params *params); + +struct brw_compile_mesh_params { + struct nir_shader *nir; + + const struct brw_mesh_prog_key *key; + struct brw_mesh_prog_data *prog_data; + const struct brw_tue_map *tue_map; + + struct brw_compile_stats *stats; + + char *error_str; + void *log_data; +}; + +const unsigned * +brw_compile_mesh(const struct brw_compiler *compiler, + void *mem_ctx, + struct brw_compile_mesh_params *params); + /** * Parameters for compiling a fragment shader. * diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index d8c61d17d8f..2cf1923b555 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -9567,6 +9567,112 @@ fs_visitor::run_bs(bool allow_spilling) return !failed; } +bool +fs_visitor::run_task(bool allow_spilling) +{ + assert(stage == MESA_SHADER_TASK); + + /* Task Shader Payloads (SIMD8 and SIMD16) + * + * R0: Header + * R1: Local_ID.X[0-7 or 0-15] + * R2: Inline Parameter + * + * Task Shader Payloads (SIMD32) + * + * R0: Header + * R1: Local_ID.X[0-15] + * R2: Local_ID.X[16-31] + * R3: Inline Parameter + * + * Local_ID.X values are 16 bits. + * + * Inline parameter is optional but always present since we use it to pass + * the address to descriptors. + */ + payload.num_regs = dispatch_width == 32 ? 4 : 3; + + if (shader_time_index >= 0) + emit_shader_time_begin(); + + emit_nir_code(); + + if (failed) + return false; + + emit_cs_terminate(); + + if (shader_time_index >= 0) + emit_shader_time_end(); + + calculate_cfg(); + + optimize(); + + assign_curb_setup(); + + fixup_3src_null_dest(); + allocate_registers(allow_spilling); + + if (failed) + return false; + + return !failed; +} + +bool +fs_visitor::run_mesh(bool allow_spilling) +{ + assert(stage == MESA_SHADER_MESH); + + /* Mesh Shader Payloads (SIMD8 and SIMD16) + * + * R0: Header + * R1: Local_ID.X[0-7 or 0-15] + * R2: Inline Parameter + * + * Mesh Shader Payloads (SIMD32) + * + * R0: Header + * R1: Local_ID.X[0-15] + * R2: Local_ID.X[16-31] + * R3: Inline Parameter + * + * Local_ID.X values are 16 bits. + * + * Inline parameter is optional but always present since we use it to pass + * the address to descriptors. + */ + payload.num_regs = dispatch_width == 32 ? 4 : 3; + + if (shader_time_index >= 0) + emit_shader_time_begin(); + + emit_nir_code(); + + if (failed) + return false; + + emit_cs_terminate(); + + if (shader_time_index >= 0) + emit_shader_time_end(); + + calculate_cfg(); + + optimize(); + + assign_curb_setup(); + + fixup_3src_null_dest(); + allocate_registers(allow_spilling); + + if (failed) + return false; + + return !failed; +} + static bool is_used_in_not_interp_frag_coord(nir_ssa_def *def) { diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 281ce0456ce..3fbfd613d07 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -127,6 +127,8 @@ public: bool run_gs(); bool run_cs(bool allow_spilling); bool run_bs(bool allow_spilling); + bool run_task(bool allow_spilling); + bool run_mesh(bool allow_spilling); void optimize(); void allocate_registers(bool allow_spilling); void setup_fs_payload_gfx4(); @@ -254,6 +256,12 @@ public: nir_intrinsic_instr *instr); void nir_emit_bs_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); + void nir_emit_task_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + void nir_emit_mesh_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); + void nir_emit_task_mesh_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld, nir_intrinsic_instr *instr); fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld, diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index c69c73f0c9f..5b17721db9a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -453,6 +453,12 @@ fs_visitor::nir_emit_instr(nir_instr *instr) case MESA_SHADER_CALLABLE: nir_emit_bs_intrinsic(abld, nir_instr_as_intrinsic(instr)); break; + case MESA_SHADER_TASK: + nir_emit_task_intrinsic(abld, nir_instr_as_intrinsic(instr)); + break; + case MESA_SHADER_MESH: + nir_emit_mesh_intrinsic(abld, nir_instr_as_intrinsic(instr)); + break; default: unreachable("unsupported shader stage"); } diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp new file mode 100644 index 00000000000..bec636dceee --- /dev/null +++ b/src/intel/compiler/brw_mesh.cpp @@ -0,0 +1,263 @@ +/* + * Copyright © 2021 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_compiler.h" +#include "brw_fs.h" +#include "brw_nir.h" +#include "brw_private.h" +#include "compiler/nir/nir_builder.h" +#include "dev/intel_debug.h" + +using namespace brw; + +const unsigned * +brw_compile_task(const struct brw_compiler *compiler, + void *mem_ctx, + struct brw_compile_task_params *params) +{ + struct nir_shader *nir = params->nir; + const struct brw_task_prog_key *key = params->key; + struct brw_task_prog_data *prog_data = params->prog_data; + const bool debug_enabled = INTEL_DEBUG(DEBUG_TASK); + + prog_data->base.base.stage = MESA_SHADER_TASK; + prog_data->base.base.total_shared = nir->info.shared_size; + + prog_data->base.local_size[0] = nir->info.workgroup_size[0]; + prog_data->base.local_size[1] = nir->info.workgroup_size[1]; + prog_data->base.local_size[2] = nir->info.workgroup_size[2]; + + const unsigned required_dispatch_width = + brw_required_dispatch_width(&nir->info, key->base.subgroup_size_type); + + fs_visitor *v[3] = {0}; + const char *error[3] = {0}; + + for (unsigned simd = 0; simd < 3; simd++) { + if (!brw_simd_should_compile(mem_ctx, simd, compiler->devinfo, &prog_data->base, + required_dispatch_width, &error[simd])) + continue; + + const unsigned dispatch_width = 8 << simd; + + nir_shader *shader = nir_shader_clone(mem_ctx, nir); + brw_nir_apply_key(shader, compiler, &key->base, dispatch_width, true /* is_scalar */); + + NIR_PASS_V(shader, brw_nir_lower_simd, dispatch_width); + + brw_postprocess_nir(shader, compiler, true /* is_scalar */, debug_enabled, + key->base.robust_buffer_access); + + v[simd] = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base, + &prog_data->base.base, shader, dispatch_width, + -1 /* shader_time_index */, debug_enabled); + + if (prog_data->base.prog_mask) { + unsigned first = ffs(prog_data->base.prog_mask) - 1; + v[simd]->import_uniforms(v[first]); + } + + const bool allow_spilling = !prog_data->base.prog_mask; + + if (v[simd]->run_task(allow_spilling)) + brw_simd_mark_compiled(simd, &prog_data->base, v[simd]->spilled_any_registers); + else + error[simd] = ralloc_strdup(mem_ctx, v[simd]->fail_msg); + } + + int selected_simd = brw_simd_select(&prog_data->base); + if (selected_simd < 0) { + params->error_str = ralloc_asprintf(mem_ctx, "Can't compile shader: %s, %s and %s.\n", + error[0], error[1], error[2]);; + return NULL; + } + + fs_visitor *selected = v[selected_simd]; + prog_data->base.prog_mask = 1 << selected_simd; + + fs_generator g(compiler, params->log_data, mem_ctx, + &prog_data->base.base, false, MESA_SHADER_TASK); + if (unlikely(debug_enabled)) { + g.enable_debug(ralloc_asprintf(mem_ctx, + "%s task shader %s", + nir->info.label ? nir->info.label + : "unnamed", + nir->info.name)); + } + + g.generate_code(selected->cfg, selected->dispatch_width, selected->shader_stats, + selected->performance_analysis.require(), params->stats); + + delete v[0]; + delete v[1]; + delete v[2]; + + return g.get_assembly(); +} + +const unsigned * +brw_compile_mesh(const struct brw_compiler *compiler, + void *mem_ctx, + struct brw_compile_mesh_params *params) +{ + struct nir_shader *nir = params->nir; + const struct brw_mesh_prog_key *key = params->key; + struct brw_mesh_prog_data *prog_data = params->prog_data; + const bool debug_enabled = INTEL_DEBUG(DEBUG_MESH); + + prog_data->base.base.stage = MESA_SHADER_MESH; + prog_data->base.base.total_shared = nir->info.shared_size; + + prog_data->base.local_size[0] = nir->info.workgroup_size[0]; + prog_data->base.local_size[1] = nir->info.workgroup_size[1]; + prog_data->base.local_size[2] = nir->info.workgroup_size[2]; + + prog_data->primitive_type = nir->info.mesh.primitive_type; + + /* TODO(mesh): Use other index formats (that are more compact) for optimization. */ + prog_data->index_format = BRW_INDEX_FORMAT_U32; + + const unsigned required_dispatch_width = + brw_required_dispatch_width(&nir->info, key->base.subgroup_size_type); + + fs_visitor *v[3] = {0}; + const char *error[3] = {0}; + + for (int simd = 0; simd < 3; simd++) { + if (!brw_simd_should_compile(mem_ctx, simd, compiler->devinfo, &prog_data->base, + required_dispatch_width, &error[simd])) + continue; + + const unsigned dispatch_width = 8 << simd; + + nir_shader *shader = nir_shader_clone(mem_ctx, nir); + brw_nir_apply_key(shader, compiler, &key->base, dispatch_width, true /* is_scalar */); + + NIR_PASS_V(shader, brw_nir_lower_simd, dispatch_width); + + brw_postprocess_nir(shader, compiler, true /* is_scalar */, debug_enabled, + key->base.robust_buffer_access); + + v[simd] = new fs_visitor(compiler, params->log_data, mem_ctx, &key->base, + &prog_data->base.base, shader, dispatch_width, + -1 /* shader_time_index */, debug_enabled); + + if (prog_data->base.prog_mask) { + unsigned first = ffs(prog_data->base.prog_mask) - 1; + v[simd]->import_uniforms(v[first]); + } + + const bool allow_spilling = !prog_data->base.prog_mask; + + if (v[simd]->run_mesh(allow_spilling)) + brw_simd_mark_compiled(simd, &prog_data->base, v[simd]->spilled_any_registers); + else + error[simd] = ralloc_strdup(mem_ctx, v[simd]->fail_msg); + } + + int selected_simd = brw_simd_select(&prog_data->base); + if (selected_simd < 0) { + params->error_str = ralloc_asprintf(mem_ctx, "Can't compile shader: %s, %s and %s.\n", + error[0], error[1], error[2]);; + return NULL; + } + + fs_visitor *selected = v[selected_simd]; + prog_data->base.prog_mask = 1 << selected_simd; + + fs_generator g(compiler, params->log_data, mem_ctx, + &prog_data->base.base, false, MESA_SHADER_MESH); + if (unlikely(debug_enabled)) { + g.enable_debug(ralloc_asprintf(mem_ctx, + "%s mesh shader %s", + nir->info.label ? nir->info.label + : "unnamed", + nir->info.name)); + } + + g.generate_code(selected->cfg, selected->dispatch_width, selected->shader_stats, + selected->performance_analysis.require(), params->stats); + + delete v[0]; + delete v[1]; + delete v[2]; + + return g.get_assembly(); +} + +void +fs_visitor::nir_emit_task_intrinsic(const fs_builder &bld, + nir_intrinsic_instr *instr) +{ + assert(stage == MESA_SHADER_TASK); + + switch (instr->intrinsic) { + case nir_intrinsic_store_output: + case nir_intrinsic_load_output: + /* TODO(mesh): Task Output. */ + break; + + default: + nir_emit_task_mesh_intrinsic(bld, instr); + break; + } +} + +void +fs_visitor::nir_emit_mesh_intrinsic(const fs_builder &bld, + nir_intrinsic_instr *instr) +{ + assert(stage == MESA_SHADER_MESH); + + switch (instr->intrinsic) { + case nir_intrinsic_load_input: + /* TODO(mesh): Mesh Input. */ + break; + + case nir_intrinsic_store_per_primitive_output: + case nir_intrinsic_store_per_vertex_output: + case nir_intrinsic_store_output: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_load_per_primitive_output: + case nir_intrinsic_load_output: + /* TODO(mesh): Mesh Output. */ + break; + + default: + nir_emit_task_mesh_intrinsic(bld, instr); + break; + } +} + +void +fs_visitor::nir_emit_task_mesh_intrinsic(const fs_builder &bld, + nir_intrinsic_instr *instr) +{ + assert(stage == MESA_SHADER_MESH || stage == MESA_SHADER_TASK); + + switch (instr->intrinsic) { + default: + nir_emit_cs_intrinsic(bld, instr); + break; + } +} diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build index dadb75d43e5..a180d8cd2a2 100644 --- a/src/intel/compiler/meson.build +++ b/src/intel/compiler/meson.build @@ -76,6 +76,7 @@ libintel_compiler_files = files( 'brw_ir_performance.h', 'brw_ir_performance.cpp', 'brw_ir_vec4.h', + 'brw_mesh.cpp', 'brw_nir.h', 'brw_nir.c', 'brw_nir_analyze_boolean_resolves.c',